ROSE  0.11.31.0
BinaryControlFlow.h
1 #ifndef ROSE_BinaryAnalysis_ControlFlow_H
2 #define ROSE_BinaryAnalysis_ControlFlow_H
3 
4 #include <featureTests.h>
5 #ifdef ROSE_ENABLE_BINARY_ANALYSIS
6 
7 #include "Map.h"
8 #include "WorkLists.h"
9 #include "SageBuilderAsm.h"
10 
11 #include <boost/foreach.hpp>
12 #include <boost/graph/adjacency_list.hpp>
13 #include <boost/graph/reverse_graph.hpp>
14 #include <boost/graph/depth_first_search.hpp>
15 #include <Sawyer/GraphBoost.h>
16 
17 class SgNode;
18 class SgAsmBlock;
19 
20 namespace Rose {
21 
28 namespace BinaryAnalysis {
29 
140 class ControlFlow {
141 public:
142  ControlFlow()
143  : vertex_filter(NULL), edge_filter(NULL)
144  {}
145 
146 
165  typedef boost::adjacency_list<boost::setS, /* edges of each vertex in std::list */
166  boost::vecS, /* vertices in std::vector */
167  boost::bidirectionalS,
168  boost::property<boost::vertex_name_t, SgAsmBlock*> > BlockGraph;
169 
188  typedef boost::adjacency_list<boost::setS,
189  boost::vecS,
190  boost::bidirectionalS,
191  boost::property<boost::vertex_name_t, SgAsmInstruction*> > InsnGraph;
192 
195  typedef BlockGraph Graph;
196 
197 
198  /**********************************************************************************************************************
199  * Filters
200  **********************************************************************************************************************/
201 public:
202 
207  class VertexFilter {
208  public:
209  virtual ~VertexFilter() {}
210  virtual bool operator()(ControlFlow*, SgAsmNode*) = 0;
211  };
212 
217  class EdgeFilter {
218  public:
219  virtual ~EdgeFilter() {}
220  virtual bool operator()(ControlFlow*, SgAsmNode *source, SgAsmNode *target) = 0;
221  };
222 
229  void set_vertex_filter(VertexFilter *filter) { vertex_filter = filter; }
230  VertexFilter *get_vertex_filter() const { return vertex_filter; }
239  void set_edge_filter(EdgeFilter *filter) { edge_filter = filter; }
240  EdgeFilter *get_edge_filter() const { return edge_filter; }
248  bool is_vertex_filtered(SgAsmNode *bb_or_insn, VertexFilter *filter) { return filter && !(*filter)(this, bb_or_insn); }
249  bool is_vertex_filtered(SgAsmNode *bb_or_insn) { return is_vertex_filtered(bb_or_insn, vertex_filter); }
258  bool is_edge_filtered(SgAsmNode *src, SgAsmNode *dst, EdgeFilter *filter) {
259  return filter && !(*filter)(this, src, dst);
260  }
262  return is_edge_filtered(src, dst, edge_filter);
263  }
266 protected:
267  VertexFilter *vertex_filter;
268  EdgeFilter *edge_filter;
269 
270  /**********************************************************************************************************************
271  * Methods that modify the AST
272  **********************************************************************************************************************/
273 public:
274 
281  void clear_ast(SgNode *ast);
282 
297  template<class ControlFlowGraph>
298  void apply_to_ast(const ControlFlowGraph&);
299 
313  template<class ControlFlowGraph>
314  void cache_vertex_descriptors(const ControlFlowGraph&);
315 
316  /**********************************************************************************************************************
317  * Graph construction methods
318  **********************************************************************************************************************/
319 public:
320 
346  template<class ControlFlowGraph>
347  ControlFlowGraph build_block_cfg_from_ast(SgNode *root);
348 
349  template<class ControlFlowGraph>
350  void build_block_cfg_from_ast(SgNode *root, ControlFlowGraph &cfg/*out*/);
351 
352  template<class ControlFlowGraph>
353  ControlFlowGraph build_insn_cfg_from_ast(SgNode *root);
354 
355  template<class ControlFlowGraph>
356  void build_insn_cfg_from_ast(SgNode *root, ControlFlowGraph &cfg/*out*/);
361  template<class BlockCFG, class InsnCFG>
362  void explode_blocks(const BlockCFG &cfgb, InsnCFG &cfgi/*out*/);
368  template<class InsnCFG>
369  void fixup_fcall_fret(InsnCFG &cfg/*in,out*/, bool preserve_call_fallthrough_edges);
370 
380  template<class ControlFlowGraph>
381  ControlFlowGraph build_cg_from_ast(SgNode *root);
382 
383  template<class ControlFlowGraph>
384  void build_cg_from_ast(SgNode *root, ControlFlowGraph &cfg/*out*/);
397  template<class ControlFlowGraph>
398  ControlFlowGraph copy(const ControlFlowGraph &src);
399 
400  template<class ControlFlowGraph>
401  void copy(const ControlFlowGraph &src, ControlFlowGraph &dst/*out*/);
404  /***********************************************************************************************************************
405  * Graph output
406  ***********************************************************************************************************************/
407 
409  template<class CFG>
411  std::vector<typename boost::graph_traits<CFG>::vertex_descriptor> vertices;
412  std::vector<typename boost::graph_traits<CFG>::edge_descriptor> edges;
413  };
414 
416  template<class CFG>
418  void operator()(std::ostream &o, typename boost::graph_traits<CFG>::vertex_descriptor vertex) const {}
419  };
420 
422  template<class CFG>
424  void operator()(std::ostream &o, typename boost::graph_traits<CFG>::edge_descriptor vertex) const {}
425  };
426 
429  template<typename CFG, class VertexPropertyWriter, class EdgePropertyWriter>
430  void write_graphviz(std::ostream&, const CFG&, const VertexPropertyWriter&, const EdgePropertyWriter&);
431 
432  template<typename CFG>
433  void write_graphviz(std::ostream &out, const CFG &cfg) {
435  }
436 
437  template<typename CFG, class VertexPropertyWriter>
438  void write_graphviz(std::ostream &out, const CFG &cfg, const VertexPropertyWriter &vpw) {
440  }
443  /**********************************************************************************************************************
444  * Miscellaneous members
445  **********************************************************************************************************************/
446 
447 private:
448  /* Visitor used by flow_order(). Declaring this in function scope results in boost errors (boost-1.42, 2011-05). */
449  template<class ControlFlowGraph>
450  struct FlowOrder: public boost::default_dfs_visitor {
451  typedef typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor Vertex;
452  typedef std::vector<Vertex> VertexList;
453  typedef std::vector<size_t> ReverseVertexList;
454  VertexList *forward_order;
455  FlowOrder(VertexList *forward_order): forward_order(forward_order) {}
456  void compute(const ControlFlowGraph &g, Vertex v0, ReverseVertexList *reverse_order);
457  void finish_vertex(Vertex v, ControlFlowGraph g);
458  };
459 
460  /* Helper class for build_block_cfg_from_ast(). Adds vertices to its 'cfg' member. Vertices are any SgAsmBlock that
461  * contains at least one SgAsmInstruction. */
462  template<class ControlFlowGraph>
463  class VertexInserter: public AstSimpleProcessing {
464  public:
465  ControlFlow *analyzer;
466  ControlFlowGraph &cfg;
467  typedef typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor Vertex;
468  typedef Map<SgAsmBlock*, Vertex> BlockVertexMap;
469  BlockVertexMap &bv_map;
470  VertexInserter(ControlFlow *analyzer, ControlFlowGraph &cfg, BlockVertexMap &bv_map)
471  : analyzer(analyzer), cfg(cfg), bv_map(bv_map)
472  {}
473  // Add basic block to graph if it hasn't been added already.
474  void conditionally_add_vertex(SgAsmBlock *block);
475 
476  void visit(SgNode *node) {
477  if (isSgAsmFunction(node)) {
478  // Add the function entry block before the other blocks of the function. This ensures that the entry block
479  // of a function has a lower vertex number than the other blocks of the function (the traversal is not
480  // guaranteed to visit the function basic blocks in that order).
481  conditionally_add_vertex(isSgAsmFunction(node)->get_entry_block());
482  } else {
483  conditionally_add_vertex(isSgAsmBlock(node));
484  }
485  }
486  };
487 
488 public:
521  template<class ControlFlowGraph>
522  std::vector<typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor>
523  flow_order(const ControlFlowGraph&,
524  typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor start,
525  std::vector<size_t> *reverse_order=NULL);
526 
527 private:
528  /* Visitor used by return_blocks(). Declaring this in function scope results in boost errors (boost-1.42, 2011-05). */
529  template<class ControlFlowGraph>
530  struct ReturnBlocks: public boost::default_dfs_visitor {
531  typedef typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor Vertex;
532  typedef std::vector<Vertex> Vector;
533  Vector &blocks;
534  ReturnBlocks(Vector &blocks): blocks(blocks) {}
535  void finish_vertex(Vertex v, ControlFlowGraph g);
536  };
537 
538 public:
546  template<class ControlFlowGraph>
547  std::vector<typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor>
548  return_blocks(const ControlFlowGraph &cfg,
549  typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor start);
550 };
551 
552 
553 /*******************************************************************************************************************************
554  * Functions
555  *******************************************************************************************************************************/
556 
559 template<class V, class E>
561 get_ast_node(const Sawyer::Container::Graph<V, E> &cfg, size_t vertexId) {
562  typedef typename Sawyer::Container::Graph<V, E> CFG;
563  typename CFG::ConstVertexValueIterator iter = cfg.findVertex(vertexId);
564  ASSERT_forbid2(iter==cfg.vertices().end(), "invalid vertex ID " + StringUtility::numberToString(vertexId));
565  return *iter;
566 }
567 
570 template<class V, class E, class AstNode>
571 void
572 put_ast_node(Sawyer::Container::Graph<V, E> &cfg, size_t vertexId, AstNode *astNode) {
573  typedef typename Sawyer::Container::Graph<V, E> CFG;
574  typename CFG::VertexValueIterator iter = cfg.findVertex(vertexId);
575  ASSERT_forbid2(iter==cfg.vertices().end(), "invalid vertex ID " + StringUtility::numberToString(vertexId));
576  *iter = astNode;
577 }
578 
579 // Sorry about this mess! The goal is to match only boost::adjacency_list graphs.
580 template<class A, class B, class C, class D, class E, class F, class G>
581 typename boost::property_traits<typename boost::property_map<boost::adjacency_list<A, B, C, D, E, F, G>,
582  boost::vertex_name_t>::type>::value_type
583 get_ast_node(const boost::adjacency_list<A, B, C, D, E, F, G> &cfg,
584  typename boost::graph_traits<boost::adjacency_list<A, B, C, D, E, F, G> >::vertex_descriptor vertex) {
585  return boost::get(boost::vertex_name, cfg, vertex);
586 }
587 
588 // Sorry about this mess! The goal is to match only boost::adjacency_list graphs.
589 template<class A, class B, class C, class D, class E, class F, class G>
590 void
591 put_ast_node(boost::adjacency_list<A, B, C, D, E, F, G> &cfg,
592  typename boost::graph_traits<boost::adjacency_list<A, B, C, D, E, F, G> >::vertex_descriptor vertex,
593  typename boost::property_traits<
594  typename boost::property_map<boost::adjacency_list<A, B, C, D, E, F, G>, boost::vertex_name_t>::type
595  >::value_type ast_node) {
596  boost::put(boost::vertex_name, cfg, vertex, ast_node);
597 }
598 
599 /******************************************************************************************************************************
600  * Function template definitions
601  ******************************************************************************************************************************/
602 
603 template<class ControlFlowGraph>
604 void
605 ControlFlow::apply_to_ast(const ControlFlowGraph &cfg)
606 {
607  typename boost::graph_traits<ControlFlowGraph>::vertex_iterator vi, vi_end;
608  for (boost::tie(vi, vi_end)=boost::vertices(cfg); vi!=vi_end; ++vi) {
609  SgAsmBlock *block = get_ast_node(cfg, *vi); // FIXME: Instruction CFGs not supported yet
610  if (!block || is_vertex_filtered(block))
611  continue;
612 
613  /* Delete old targets */
614  const SgAsmIntegerValuePtrList &targets = block->get_successors();
615  for (SgAsmIntegerValuePtrList::const_iterator ti=targets.begin(); ti!=targets.end(); ++ti)
616  delete *ti;
617 
618  /* Add new targets */
619  block->set_successors_complete(true);
620  block->get_successors().clear();
621  typename boost::graph_traits<ControlFlowGraph>::out_edge_iterator ei, ei_end;
622  for (boost::tie(ei, ei_end)=boost::out_edges(*vi, cfg); ei!=ei_end; ++ei) {
623  SgAsmBlock *target_block = get_ast_node(cfg, boost::target(*ei, cfg));
624  if (target_block && !is_edge_filtered(block, target_block)) {
625  SgAsmIntegerValueExpression *target = SageBuilderAsm::buildValueU64(target_block->get_address());
626  target->makeRelativeTo(target_block);
627  target->set_parent(block);
628  block->get_successors().push_back(target);
629  }
630  }
631  }
632 }
633 
634 template<class ControlFlowGraph>
635 void
636 ControlFlow::cache_vertex_descriptors(const ControlFlowGraph &cfg)
637 {
638  typename boost::graph_traits<ControlFlowGraph>::vertex_iterator vi, vi_end;
639  for (boost::tie(vi, vi_end)=boost::vertices(cfg); vi!=vi_end; ++vi) {
640  SgAsmBlock *block = get_ast_node(cfg, *vi); // FIXME: Instruction CFGs not supported yet
641  if (block && !is_vertex_filtered(block))
642  block->set_cached_vertex(*vi);
643  }
644 }
645 
646 template<class ControlFlowGraph>
647 void
648 ControlFlow::VertexInserter<ControlFlowGraph>::conditionally_add_vertex(SgAsmBlock *block)
649 {
650  if (block && block->has_instructions() && !analyzer->is_vertex_filtered(block) && !bv_map.exists(block)) {
651  Vertex vertex = boost::add_vertex(cfg);
652  bv_map[block] = vertex;
653  put_ast_node(cfg, vertex, block);
654  }
655 }
656 
657 template<class ControlFlowGraph>
658 void
659 ControlFlow::build_block_cfg_from_ast(SgNode *root, ControlFlowGraph &cfg)
660 {
661  typedef typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor Vertex;
662  Vertex NO_VERTEX = boost::graph_traits<ControlFlowGraph>::null_vertex();
663  typedef Map<SgAsmBlock*, Vertex> BlockVertexMap;
664  BlockVertexMap bv_map;
665 
666  // Add the vertices
667  cfg.clear();
668  VertexInserter<ControlFlowGraph>(this, cfg, bv_map).traverse(root, preorder);
669 
670  // Mapping from block entry address to CFG vertex
671  Map<rose_addr_t, Vertex> addrToVertex;
672  for (typename BlockVertexMap::iterator bvi=bv_map.begin(); bvi!=bv_map.end(); ++bvi)
673  addrToVertex[bvi->first->get_address()] = bvi->second;
674 
675  // Add the edges
676  BOOST_FOREACH (Vertex sourceVertex, boost::vertices(cfg)) {
677  SgAsmBlock *sourceBlock = get_ast_node(cfg, sourceVertex);
678  BOOST_FOREACH (SgAsmIntegerValueExpression *integerValue, sourceBlock->get_successors()) {
679  Vertex targetVertex = addrToVertex.get_value_or(integerValue->get_absoluteValue(), NO_VERTEX);
680  if (targetVertex!=NO_VERTEX) {
681  SgAsmBlock *targetBlock = get_ast_node(cfg, targetVertex);
682  assert(targetBlock!=NULL); // since we have a vertex, there must be an SgAsmBlock!
683  if (!is_edge_filtered(sourceBlock, targetBlock))
684  boost::add_edge(sourceVertex, targetVertex, cfg);
685  }
686  }
687  }
688 }
689 
690 template<class ControlFlowGraph>
691 void
692 ControlFlow::build_insn_cfg_from_ast(SgNode *root, ControlFlowGraph &cfg)
693 {
694  BlockGraph cfgb;
695  build_block_cfg_from_ast(root, cfgb);
696  explode_blocks(cfgb, cfg);
697  bool preserve_call_fallthrough_edges = false;
698  fixup_fcall_fret(cfg, preserve_call_fallthrough_edges);
699 }
700 
701 template<class ControlFlowGraph>
702 void
703 ControlFlow::build_cg_from_ast(SgNode *root, ControlFlowGraph &cfg/*out*/)
704 {
705  struct T1: public EdgeFilter {
706  EdgeFilter *parent;
707  T1(EdgeFilter *parent): parent(parent) {}
708  bool operator()(ControlFlow *analyzer, SgAsmNode *src, SgAsmNode *dst) {
709  SgAsmFunction *src_func = SageInterface::getEnclosingNode<SgAsmFunction>(src, true);
710  SgAsmBlock *dst_block = SageInterface::getEnclosingNode<SgAsmBlock>(dst, true);
711  SgAsmFunction *dst_func = SageInterface::getEnclosingNode<SgAsmFunction>(dst_block);
712  if (!src_func || !dst_func || dst_block!=dst_func->get_entry_block()) {
713  return false;
714  } else if (src_func!=dst_func) {
715  // inter-function call, not a return edge
716  } else {
717  // FIXME: this might not actually be a recursive call [Robb P. Matzke 2013-09-05]
718  }
719  return parent ? (*parent)(analyzer, src, dst) : true;
720  }
721  };
722 
723  EdgeFilter *parent = get_edge_filter();
724  T1 edge_filter(parent);
725  try {
726  set_edge_filter(&edge_filter);
727  build_block_cfg_from_ast(root, cfg);
728  } catch (...) {
729  set_edge_filter(parent);
730  throw;
731  }
732  set_edge_filter(parent);
733 }
734 
735 template<class ControlFlowGraph>
736 void
737 ControlFlow::copy(const ControlFlowGraph &src, ControlFlowGraph &dst/*out*/)
738 {
739  typedef typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor Vertex;
740  Vertex NO_VERTEX = boost::graph_traits<ControlFlowGraph>::null_vertex();
741 
742  dst.clear();
743  std::vector<Vertex> src_to_dst(boost::num_vertices(src), NO_VERTEX);
744 
745  typename boost::graph_traits<const ControlFlowGraph>::vertex_iterator vi, vi_end;
746  for (boost::tie(vi, vi_end)=boost::vertices(src); vi!=vi_end; ++vi) {
747  SgAsmNode *node = get_ast_node(src, *vi);
748  if (!is_vertex_filtered(node)) {
749  src_to_dst[*vi] = boost::add_vertex(dst);
750  put_ast_node(dst, src_to_dst[*vi], get_ast_node(src, *vi));
751  }
752  }
753 
754  typename boost::graph_traits<const ControlFlowGraph>::edge_iterator ei, ei_end;
755  for (boost::tie(ei, ei_end)=boost::edges(src); ei!=ei_end; ++ei) {
756  if (NO_VERTEX!=src_to_dst[boost::source(*ei, src)] && NO_VERTEX!=src_to_dst[boost::target(*ei, src)]) {
757  SgAsmNode *node1 = get_ast_node(src, boost::source(*ei, src));
758  SgAsmNode *node2 = get_ast_node(src, boost::target(*ei, src));
759  if (!is_edge_filtered(node1, node2))
760  boost::add_edge(src_to_dst[boost::source(*ei, src)], src_to_dst[boost::target(*ei, src)], dst);
761  }
762  }
763 }
764 
765 template<class ControlFlowGraph>
766 ControlFlowGraph
767 ControlFlow::copy(const ControlFlowGraph &src)
768 {
769  ControlFlowGraph dst;
770  copy(src, dst);
771  return dst;
772 }
773 
774 template<class BlockCFG, class InsnCFG>
775 void
776 ControlFlow::explode_blocks(const BlockCFG &cfgb, InsnCFG &cfgi/*out*/)
777 {
778  // BlockCFG is the basic-block binary control flow graph
779  typedef typename boost::graph_traits<const BlockCFG>::vertex_descriptor BlockCFG_Vertex;
780  typedef typename boost::graph_traits<const BlockCFG>::vertex_iterator BlockCFG_VertexIterator;
781  typedef typename boost::graph_traits<const BlockCFG>::edge_iterator BlockCFG_EdgeIterator;
782 
783  // InsnCFG is the instruction binary control flow graph--it points to instructions rather than basic blocks, and changes
784  // some edges regarding function calls.
785  typedef typename boost::graph_traits<InsnCFG>::vertex_descriptor InsnCFG_Vertex;
786  typedef std::pair<InsnCFG_Vertex, InsnCFG_Vertex> InsnCFG_VertexPair;
787 
788  // Expand the cfgb basic blocks to create a cfgi that has instructions instead of blocks, and add the intra-block edges
789  cfgi.clear();
790  Map<BlockCFG_Vertex, InsnCFG_VertexPair> vertex_translation; // enter and leave instructions for each of the blocks in cfgb
791  {
792  BlockCFG_VertexIterator vi, vi_end;
793  for (boost::tie(vi, vi_end)=boost::vertices(cfgb); vi!=vi_end; ++vi) {
794  SgAsmBlock *blk = get_ast_node(cfgb, *vi);
795  const SgAsmStatementPtrList &insns = blk->get_statementList();
796  assert(!insns.empty());
797  InsnCFG_Vertex enter_vertex = boost::graph_traits<InsnCFG>::null_vertex();
798  InsnCFG_Vertex prev_vertex = boost::graph_traits<InsnCFG>::null_vertex();
799  for (SgAsmStatementPtrList::const_iterator ii=insns.begin(); ii!=insns.end(); ++ii) {
800  SgAsmInstruction *insn = isSgAsmInstruction(*ii);
801  assert(insn!=NULL); // basic blocks contain only instructions, no other type of asm statement
802  InsnCFG_Vertex vertex = boost::add_vertex(cfgi);
803  put_ast_node(cfgi, vertex, insn);
804  if (ii==insns.begin()) {
805  enter_vertex = vertex;
806  } else {
807  boost::add_edge(prev_vertex, vertex, cfgi);
808  }
809  prev_vertex = vertex;
810  }
811  assert(prev_vertex!=boost::graph_traits<InsnCFG>::null_vertex()); // basic block had no instructions but was in CFG!
812  vertex_translation[*vi] = InsnCFG_VertexPair(enter_vertex, prev_vertex);
813  }
814  }
815 
816  // Insert the edges from cfgb. The corresponding edge in cfgi must emanate from the final instruction of the source basic
817  // block and enter at the first instruction of the target basic block.
818  {
819  BlockCFG_EdgeIterator ei, ei_end;
820  for (boost::tie(ei, ei_end)=boost::edges(cfgb); ei!=ei_end; ++ei) {
821  InsnCFG_Vertex src_leave_vertex = vertex_translation.get_one(boost::source(*ei, cfgb)).second;
822  InsnCFG_Vertex dst_enter_vertex = vertex_translation.get_one(boost::target(*ei, cfgb)).first;
823  assert(src_leave_vertex!=boost::graph_traits<InsnCFG>::null_vertex());
824  assert(dst_enter_vertex!=boost::graph_traits<InsnCFG>::null_vertex());
825  boost::add_edge(src_leave_vertex, dst_enter_vertex, cfgi);
826  }
827  }
828 }
829 
830 template<class InsnCFG>
831 void
832 ControlFlow::fixup_fcall_fret(InsnCFG &cfg, bool preserve_call_fallthrough_edges)
833 {
834  typedef typename boost::graph_traits<InsnCFG>::vertex_descriptor CFG_Vertex;
835  typedef typename boost::graph_traits<InsnCFG>::vertex_iterator CFG_VertexIterator;
836  typedef typename boost::graph_traits<InsnCFG>::in_edge_iterator CFG_InEdgeIterator;
837  typedef std::pair<CFG_Vertex, CFG_Vertex> CFG_VertexPair;
838  typedef Map<SgAsmInstruction*, CFG_Vertex> InsnToVertex;
839  CFG_Vertex NO_VERTEX = boost::graph_traits<InsnCFG>::null_vertex();
840 
841  // Build mappings needed later and find the function return points. We just look for the x86
842  // RET instruction for now and assume that each one we find is a return if it has no control flow successors. They have no
843  // successors at this point because CFG1 didn't have any.
844  InstructionMap insns;
845  InsnToVertex insn_to_vertex;
846  std::vector<bool> isret(boost::num_vertices(cfg), false);
847  {
848  CFG_VertexIterator vi, vi_end;
849  for (boost::tie(vi, vi_end)=boost::vertices(cfg); vi!=vi_end; ++vi) {
850  SgAsmInstruction *insn = get_ast_node(cfg, *vi);
851  insns[insn->get_address()] = insn;
852  insn_to_vertex[insn] = *vi;
853 
854  if (0==boost::out_degree(*vi, cfg)) {
855  // FIXME: Architecture-specific code here
856  if (SgAsmX86Instruction *insn_x86 = isSgAsmX86Instruction(insn)) {
857  isret[*vi] = x86_ret==insn_x86->get_kind();
858  }
859  }
860  }
861  }
862 
863  // Return the entry vertex for a function that owns the indicated instruction
864  struct FunctionEntryVertex {
865  const InsnToVertex &insn_to_vertex;
866  const InstructionMap &imap;
867  FunctionEntryVertex(const InsnToVertex &insn_to_vertex, const InstructionMap &imap)
868  : insn_to_vertex(insn_to_vertex), imap(imap) {}
869  CFG_Vertex operator()(SgAsmInstruction *insn) {
870  SgAsmFunction *func = SageInterface::getEnclosingNode<SgAsmFunction>(insn, true);
871  SgAsmInstruction *entry_insn = imap.get_one(func->get_entry_va());
872  CFG_Vertex entry_vertex = insn_to_vertex.get_one(entry_insn);
873  return entry_vertex;
874  }
875  } function_entry_vertex(insn_to_vertex, insns);
876 
877  // Process each return site in order to add edges from the return site to the vertex representing the return address
878  std::vector<CFG_VertexPair> edges_to_insert, edges_to_erase;
879  {
880  CFG_VertexIterator vi, vi_end;
881  for (boost::tie(vi, vi_end)=boost::vertices(cfg); vi!=vi_end; ++vi) {
882  CFG_Vertex returner_vertex = *vi;
883  if (!isret[returner_vertex])
884  continue;
885  SgAsmInstruction *returner_insn = get_ast_node(cfg, returner_vertex);
886 
887  // Find all of the true call sites for the function that owns the returner instruction (e.g., RET) by recursively
888  // following inter-function CFG edges until we find the true calls (those edges that follow CALL semantics).
889  // Inter-function CFG edges can represent true calls or simply inter-function branches such as thunks. We have to
890  // gather up the information without adding it to the CFG yet (can't add while we're iterating)
891  std::vector<bool> seen(boost::num_vertices(cfg), false);
892  WorkList<CFG_Vertex> worklist; // targets of inter-function CFG edges; function callees
893  worklist.push(function_entry_vertex(returner_insn));
894  while (!worklist.empty()) {
895  CFG_Vertex callee_vertex = worklist.shift();
896  CFG_InEdgeIterator ei, ei_end;
897  for (boost::tie(ei, ei_end)=boost::in_edges(callee_vertex, cfg); ei!=ei_end; ++ei) {
898  CFG_Vertex caller_vertex = boost::source(*ei, cfg); // caller is a inter-function call or branch site
899  if (!seen[caller_vertex]) {
900  seen[caller_vertex] = true;
901  SgAsmInstruction *caller_insn = get_ast_node(cfg, caller_vertex);
902  SgAsmBlock *caller_block = SageInterface::getEnclosingNode<SgAsmBlock>(caller_insn);
903  assert(caller_block!=NULL);
904  rose_addr_t target_va, returnee_va; // returnee_va is usually the call's fall-through address
905  if (caller_block->is_function_call(target_va/*out*/, returnee_va/*out*/)) {
906  // This is a true call, so we need to add a return edge from the return instruction (the
907  // "returner") to what is probably the fall-through address of the call site (the returnee).
908  SgAsmInstruction *returnee_insn = insns.get_value_or(returnee_va, NULL);
909  CFG_Vertex returnee_vertex = insn_to_vertex.get_value_or(returnee_insn, NO_VERTEX);
910  if (returnee_vertex!=NO_VERTEX) {
911  edges_to_insert.push_back(CFG_VertexPair(returner_vertex, returnee_vertex));
912  edges_to_erase.push_back(CFG_VertexPair(caller_vertex, returnee_vertex));
913  }
914  } else {
915  // This is a non-call inter-function edge; probably a thunk. We need to find its call sites and add
916  // the returnee addresses (call fall throughs) to the returnee addresses of the RET we're
917  // processing.
918  worklist.push(function_entry_vertex(caller_insn));
919  }
920  }
921  }
922  }
923  }
924  }
925 
926  // Erase and insert edges now that we're done iterating.
927  if (!preserve_call_fallthrough_edges) {
928  for (size_t i=0; i<edges_to_erase.size(); ++i)
929  boost::remove_edge(edges_to_erase[i].first, edges_to_erase[i].second, cfg);
930  }
931  for (size_t i=0; i<edges_to_insert.size(); ++i)
932  boost::add_edge(edges_to_insert[i].first, edges_to_insert[i].second, cfg);
933 }
934 
935 template<class ControlFlowGraph>
936 void
937 ControlFlow::FlowOrder<ControlFlowGraph>::compute(const ControlFlowGraph &g, Vertex v0,
938  ReverseVertexList *reverse_order) {
939  forward_order->clear();
940  std::vector<boost::default_color_type> colors(boost::num_vertices(g), boost::white_color);
941  boost::depth_first_visit(g, v0, *this, &(colors[0]));
942  assert(!forward_order->empty()); /* it should at least contain v0 */
943  std::reverse(forward_order->begin(), forward_order->end());
944  if (reverse_order) {
945  reverse_order->clear();
946  reverse_order->resize(boost::num_vertices(g), INVALID_INDEX);
947  for (size_t i=0; i<forward_order->size(); i++)
948  (*reverse_order)[(*forward_order)[i]] = i;
949  }
950 }
951 
952 template<class ControlFlowGraph>
953 void
954 ControlFlow::FlowOrder<ControlFlowGraph>::finish_vertex(Vertex v, ControlFlowGraph g) {
955  forward_order->push_back(v);
956 }
957 
958 template<class ControlFlowGraph>
959 std::vector<typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor>
960 ControlFlow::flow_order(const ControlFlowGraph &cfg,
961  typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor start,
962  std::vector<size_t> *reverse_order/*=NULL*/)
963 {
964  std::vector<typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor> forward_order;
965  FlowOrder<ControlFlowGraph>(&forward_order).compute(cfg, start, reverse_order);
966  return forward_order;
967 }
968 
969 template<class ControlFlowGraph>
970 void
971 ControlFlow::ReturnBlocks<ControlFlowGraph>::finish_vertex(Vertex v, ControlFlowGraph g)
972 {
973  typename boost::graph_traits<ControlFlowGraph>::out_edge_iterator ei, ei_end;
974  boost::tie(ei, ei_end) = boost::out_edges(v, g);
975  if (ei==ei_end)
976  blocks.push_back(v);
977 }
978 
979 template<class ControlFlowGraph>
980 std::vector<typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor>
981 ControlFlow::return_blocks(const ControlFlowGraph &cfg,
982  typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor start)
983 {
984  typename ReturnBlocks<ControlFlowGraph>::Vector result;
985  ReturnBlocks<ControlFlowGraph> visitor(result);
986  std::vector<boost::default_color_type> colors(boost::num_vertices(cfg), boost::white_color);
987  boost::depth_first_visit(cfg, start, visitor, &(colors[0]));
988  return result;
989 }
990 
991 template<class ControlFlowGraph>
992 ControlFlowGraph
994 {
995  ControlFlowGraph cfg;
996  build_block_cfg_from_ast(root, cfg);
997  return cfg;
998 }
999 
1000 template<class ControlFlowGraph>
1001 ControlFlowGraph
1003 {
1004  ControlFlowGraph cfg;
1005  build_insn_cfg_from_ast(root, cfg);
1006  return cfg;
1007 }
1008 
1009 template<class ControlFlowGraph>
1010 ControlFlowGraph
1012 {
1013  ControlFlowGraph cfg;
1014  build_cg_from_ast(root, cfg);
1015  return cfg;
1016 }
1017 
1019 template<typename CFG, class VertexPropertyWriter, class EdgePropertyWriter>
1020 void
1021 ControlFlow::write_graphviz(std::ostream &out, const CFG &cfg,
1022  const VertexPropertyWriter &vpw, const EdgePropertyWriter &epw)
1023 {
1024  // typedef typename boost::graph_traits<CFG>::vertex_descriptor CFG_Vertex;
1025  typedef typename boost::graph_traits<CFG>::edge_descriptor CFG_Edge;
1026  typedef typename boost::graph_traits<CFG>::vertex_iterator CFG_VertexIterator;
1027  typedef typename boost::graph_traits<CFG>::out_edge_iterator CFG_OutEdgeIterator;
1028 
1029  // Partition the graph into functions and inter-function edges
1031  Functions funcs;
1032  std::vector<CFG_Edge> interfunc_edges;
1033  CFG_VertexIterator vi, vi_end;
1034  for (boost::tie(vi, vi_end)=boost::vertices(cfg); vi!=vi_end; ++vi) {
1035  SgAsmFunction *func = SageInterface::getEnclosingNode<SgAsmFunction>(get_ast_node(cfg, *vi), true);
1036  FunctionSubgraphInfo<CFG> &f = funcs[func];
1037  f.vertices.push_back(*vi);
1038  CFG_OutEdgeIterator ei, ei_end;
1039  for (boost::tie(ei, ei_end)=boost::out_edges(*vi, cfg); ei!=ei_end; ++ei) {
1040  SgNode *tgt_node = get_ast_node(cfg, boost::target(*ei, cfg));
1041  SgAsmFunction *tgt_func = SageInterface::getEnclosingNode<SgAsmFunction>(tgt_node, true);
1042  if (tgt_func==func) {
1043  f.edges.push_back(*ei);
1044  } else {
1045  interfunc_edges.push_back(*ei);
1046  }
1047  }
1048  }
1049 
1050  // Output subgraph info, each function in its own cluster
1051  out <<"digraph G {\n";
1052  for (typename Functions::iterator fi=funcs.begin(); fi!=funcs.end(); ++fi) {
1053  FunctionSubgraphInfo<CFG> &f = fi->second;
1054  if (!f.vertices.empty() || !f.edges.empty()) {
1055  SgNode *node = get_ast_node(cfg, f.vertices.front());
1056  SgAsmFunction *func = SageInterface::getEnclosingNode<SgAsmFunction>(node, true);
1057  char cluster_name[64];
1058  sprintf(cluster_name, "cluster_F%" PRIx64, func->get_entry_va());
1059  out <<" subgraph " <<cluster_name <<" {\n"
1060  <<" style=filled;\n"
1061  <<" color=lightgrey;\n"
1062  <<" label=\"Function " <<StringUtility::addrToString(func->get_entry_va())
1063  <<(func->get_name().empty()?std::string(""):(" <"+func->get_name()+">")) <<"\";\n";
1064  for (size_t i=0; i<f.vertices.size(); ++i) {
1065  out <<" " <<f.vertices[i];
1066  vpw(out, f.vertices[i]);
1067  out <<";\n";
1068  }
1069  for (size_t i=0; i<f.edges.size(); ++i) {
1070  out <<" " <<boost::source(f.edges[i], cfg) <<"->" <<boost::target(f.edges[i], cfg);
1071  epw(out, f.edges[i]);
1072  out <<";\n";
1073  }
1074  out <<" }\n"; // subgraph
1075  }
1076  }
1077 
1078  // Inter-function edges
1079  for (size_t i=0; i<interfunc_edges.size(); ++i) {
1080  out <<" " <<boost::source(interfunc_edges[i], cfg) <<"->" <<boost::target(interfunc_edges[i], cfg);
1081  epw(out, interfunc_edges[i]);
1082  out <<";\n";
1083  }
1084  out <<"}\n"; // digraph
1085 }
1086 
1087 } // namespace
1088 } // namespace
1089 
1090 #endif
1091 #endif
BlockGraph Graph
Default control flow graph.
void put_ast_node(Sawyer::Container::Graph< V, E > &cfg, size_t vertexId, AstNode *astNode)
Set the AST node associated with a vertex.
void write_graphviz(std::ostream &out, const CFG &cfg)
Write a CFG to a graphviz file, creating a cluster subgraph for each function.
Graph containing user-defined vertices and edges.
Definition: Graph.h:625
ROSE_UTIL_API std::string numberToString(long long)
Convert an integer to a string.
bool is_function_call(rose_addr_t &target_va, rose_addr_t &return_va)
Returns true if basic block appears to be a function call.
Instruction basic block.
Base class for all binary analysis IR nodes.
Class for traversing the AST.
void apply_to_ast(const ControlFlowGraph &)
Applies graph to AST.
void set_cached_vertex(size_t)
Property: Cached vertex for control flow graphs.
Base class for machine instructions.
bool is_vertex_filtered(SgAsmNode *bb_or_insn, VertexFilter *filter)
Determines if a vertex is filtered out.
SgAsmBlock * get_entry_block() const
Function entry basic block.
boost::adjacency_list< boost::setS, boost::vecS, boost::bidirectionalS, boost::property< boost::vertex_name_t, SgAsmInstruction * > > InsnGraph
Default instruction-based control flow graph.
void set_successors_complete(bool)
Property: Whether the successors list is complete.
void explode_blocks(const BlockCFG &cfgb, InsnCFG &cfgi)
Create an instruction control flow graph from a basic block control flow graph.
const T & get_value_or(const Key &key, const T &dflt) const
Convenience for getting a value from an Option.
Definition: Map.h:75
void set_parent(SgNode *parent)
All nodes in the AST contain a reference to a parent node.
List of things to work on.
Definition: WorkLists.h:60
ControlFlowGraph build_insn_cfg_from_ast(SgNode *root)
Builds a control flow graph for part of an AST.
Represents a synthesized function.
Sawyer::Container::Graph< V, E >::VertexValue get_ast_node(const Sawyer::Container::Graph< V, E > &cfg, size_t vertexId)
Return the AST node associated with a vertex.
ControlFlowGraph build_block_cfg_from_ast(SgNode *root)
Builds a control flow graph for part of an AST.
std::vector< typename boost::graph_traits< ControlFlowGraph >::vertex_descriptor > flow_order(const ControlFlowGraph &, typename boost::graph_traits< ControlFlowGraph >::vertex_descriptor start, std::vector< size_t > *reverse_order=NULL)
Orders nodes by depth first search reverse post order.
void write_graphviz(std::ostream &out, const CFG &cfg, const VertexPropertyWriter &vpw)
Write a CFG to a graphviz file, creating a cluster subgraph for each function.
ControlFlowGraph copy(const ControlFlowGraph &src)
Copies a graph while filtering.
boost::iterator_range< VertexIterator > vertices()
Iterators for all vertices.
Definition: Graph.h:1442
Main namespace for the ROSE library.
void fixup_fcall_fret(InsnCFG &cfg, bool preserve_call_fallthrough_edges)
Fix up a CFG by changing function call and return edges.
void clear_ast(SgNode *ast)
Clears successor information from the AST.
T shift()
Remove and return the item from the front of the work list.
Definition: WorkLists.h:232
void set_edge_filter(EdgeFilter *filter)
Manipulate the edge filter.
bool push(const T &, boost::tribool check_uniqueness=boost::logic::indeterminate)
Add an item to the back of the work list.
Definition: WorkLists.h:198
Base class for integer values.
This class represents the base class for all IR nodes within Sage III.
Definition: Cxx_Grammar.h:9451
ROSE_UTIL_API std::string addrToString(uint64_t value, size_t nbits=0)
Convert a virtual address to a string.
bool is_edge_filtered(SgAsmNode *src, SgAsmNode *dst)
Determines if an edge is filtered out.
ControlFlowGraph build_cg_from_ast(SgNode *root)
Builds a control flow graph with only function call edges.
const SgAsmIntegerValuePtrList & get_successors() const
Property: Control flow successors.
void makeRelativeTo(SgNode *baseNode)
Makes the value of this integer relative to some other addressable node.
bool has_instructions() const
Determins if a block contains instructions.
boost::adjacency_list< boost::setS, boost::vecS, boost::bidirectionalS, boost::property< boost::vertex_name_t, SgAsmBlock * > > BlockGraph
Default basic block control flow graph type.
VertexIterator findVertex(size_t id)
Finds the vertex with specified ID number.
Definition: Graph.h:1489
VertexFilter * get_vertex_filter() const
Manipulate the vertex filter.
Represents one Intel x86 machine instruction.
Extends std::map with methods that return optional values.
Definition: Map.h:10
rose_addr_t get_entry_va() const
Property: Primary entry address.
bool empty() const
Returns true if this work list is empty.
Definition: WorkLists.h:68
const SgAsmStatementPtrList & get_statementList() const
Property: Statements of which this block is composed.
std::vector< typename boost::graph_traits< ControlFlowGraph >::vertex_descriptor > return_blocks(const ControlFlowGraph &cfg, typename boost::graph_traits< ControlFlowGraph >::vertex_descriptor start)
Returns list of function return blocks.
bool is_edge_filtered(SgAsmNode *src, SgAsmNode *dst, EdgeFilter *filter)
Determines if an edge is filtered out.
Binary control flow analysis.
void write_graphviz(std::ostream &, const CFG &, const VertexPropertyWriter &, const EdgePropertyWriter &)
Write a CFG to a graphviz file, creating a cluster subgraph for each function.
V VertexValue
User-level data associated with vertices.
Definition: Graph.h:627
const T & get_one(const Key &key) const
Look up one value or throw an exception.
Definition: Map.h:58
rose_addr_t get_address() const
Property: Starting virtual address.
uint64_t get_absoluteValue(size_t nbits=0) const
Returns the current absolute value zero filled to 64 bits.
bool is_vertex_filtered(SgAsmNode *bb_or_insn)
Determines if a vertex is filtered out.
const size_t INVALID_INDEX(-1)
Invalid array index.
void cache_vertex_descriptors(const ControlFlowGraph &)
Cache basic block vertex descriptors in AST.
List of vertices and intra-function edges for one function.
EdgeFilter * get_edge_filter() const
Manipulate the edge filter.
void set_vertex_filter(VertexFilter *filter)
Manipulate the vertex filter.