Partitioner.h

Go to the documentation of this file.
00001 #ifndef ROSE_DISASSEMBLER_PARTITIONER_H
00002 #define ROSE_DISASSEMBLER_PARTITIONER_H
00003 
00004 #include "callbacks.h"
00005 #include "Disassembler.h"
00006 
00111 class Partitioner {
00112     /*************************************************************************************************************************
00113      *                                        Public Exceptions
00114      *************************************************************************************************************************/
00115 public:
00116     struct Exception {
00117         std::string mesg;
00118         Exception(const std::string &mesg): mesg(mesg) {}
00119         void print(std::ostream &o) const { o <<mesg; }
00120         friend std::ostream& operator<<(std::ostream &o, const Exception &e);
00121     };
00122 
00123     /*************************************************************************************************************************
00124      *                                        Data Structures Useful to Subclasses
00125      *************************************************************************************************************************/
00126 protected:
00127 
00128     struct Function;
00129     struct DataBlock;
00130     struct BasicBlock;
00131 
00135     class Instruction {
00136     public:
00137         Instruction(SgAsmInstruction *node): node(node), bblock(NULL) { assert(node!=NULL); }
00138         SgAsmInstruction *node;                 
00139         BasicBlock *bblock;                     
00141         /* These methods are forwarded to the underlying instruction node for convenience. */
00142         Disassembler::AddressSet get_successors(bool *complete) const { return node->get_successors(complete); }
00143         rose_addr_t get_address() const { return node->get_address(); }
00144         size_t get_size() const { return node->get_size(); }
00145         bool terminatesBasicBlock() const { return node->terminatesBasicBlock(); }
00146         SgUnsignedCharList get_raw_bytes() const { return node->get_raw_bytes(); } // FIXME: should return const ref?
00147     };
00148 
00149     typedef std::map<rose_addr_t, Instruction*> InstructionMap;
00150     typedef std::vector<Instruction*> InstructionVector;
00151 
00159     static SgAsmInstruction *isSgAsmInstruction(const Instruction *);
00160     static SgAsmInstruction *isSgAsmInstruction(SgNode*);
00161     static SgAsmx86Instruction *isSgAsmx86Instruction(const Instruction*);
00162     static SgAsmx86Instruction *isSgAsmx86Instruction(SgNode*);
00169     class BlockAnalysisCache {
00170     public:
00171         BlockAnalysisCache(): age(0), sucs_complete(false), is_function_call(false), call_target(NO_TARGET),
00172                               function_return(false), alias_for(0) {}
00173         void clear() {
00174             age = 0;
00175             sucs.clear();
00176             sucs_complete = false;
00177             is_function_call = false;
00178             call_target = NO_TARGET;
00179             function_return = false;
00180             alias_for = 0;
00181         }
00182 
00183         size_t age;                             
00188         Disassembler::AddressSet sucs;          
00189         bool sucs_complete;                     
00190         bool is_function_call;                  
00191         rose_addr_t call_target;                
00196         bool function_return;                   
00199         rose_addr_t alias_for;                  
00202     };
00203 
00210     struct BasicBlock {
00213         BasicBlock(): reason(SgAsmBlock::BLK_NONE), function(NULL), code_likelihood(1.0) {}
00214 
00217         ~BasicBlock() {}
00218 
00220         bool valid_cache() const { return cache.age==insns.size(); }
00221 
00223         void invalidate_cache() { cache.age=0; }
00224 
00226         void validate_cache() { cache.age=insns.size(); }
00227 
00230         void clear_data_blocks();
00231 
00232         Instruction* last_insn() const;         
00233         rose_addr_t address() const;            /* Return the address of the basic block's first (entry) instruction. */
00234         unsigned reason;                        
00235         std::vector<Instruction*> insns;        
00236         std::set<DataBlock*> data_blocks;       
00237         BlockAnalysisCache cache;               
00238         Function* function;                     
00239         double code_likelihood;                 
00240     };
00241     typedef std::map<rose_addr_t, BasicBlock*> BasicBlocks;
00242 
00255     struct DataBlock {
00259         DataBlock(): reason(SgAsmBlock::BLK_NONE), function(NULL), basic_block(NULL) {}
00260 
00263         ~DataBlock() {}
00264 
00265         rose_addr_t address() const;            /* Return the address of the first node of a data block. */
00266         std::vector<SgAsmStaticData*> nodes;    
00267         unsigned reason;                        
00268         Function *function;                     
00269         BasicBlock *basic_block;                
00270     };
00271     typedef std::map<rose_addr_t, DataBlock*> DataBlocks;
00272 
00274     class Function {
00275     public:
00276         Function(rose_addr_t entry_va)
00277             : reason(0), pending(true), entry_va(entry_va),
00278               may_return_cur(SgAsmFunction::RET_UNKNOWN), may_return_old(SgAsmFunction::RET_UNKNOWN) {}
00279         Function(rose_addr_t entry_va, unsigned r)
00280             : reason(r), pending(true), entry_va(entry_va),
00281               may_return_cur(SgAsmFunction::RET_UNKNOWN), may_return_old(SgAsmFunction::RET_UNKNOWN) {}
00282         Function(rose_addr_t entry_va, unsigned r, const std::string& name)
00283             : reason(r), name(name), pending(true), entry_va(entry_va),
00284               may_return_cur(SgAsmFunction::RET_UNKNOWN), may_return_old(SgAsmFunction::RET_UNKNOWN) {}
00285 
00287         void clear_basic_blocks();
00288 
00290         void clear_data_blocks();
00291 
00293         void move_basic_blocks_from(Function *other);
00294 
00296         void move_data_blocks_from(Function *other);
00297 
00304         SgAsmFunction::MayReturn get_may_return() const { return may_return_cur; }
00305         void set_may_return(SgAsmFunction::MayReturn may_return) { may_return_cur = may_return; }
00306         bool changed_may_return() const { return may_return_cur != may_return_old; }
00307         void commit_may_return() { may_return_old = may_return_cur; }
00311         bool possible_may_return() const {
00312             return SgAsmFunction::RET_SOMETIMES==get_may_return() || SgAsmFunction::RET_ALWAYS==get_may_return();
00313         }
00314 
00318         void promote_may_return(SgAsmFunction::MayReturn new_value);
00319 
00323         Function *init_properties(const Function &other);
00324 
00326         void show_properties(FILE*) const;
00327 
00328     public:
00329         /* If you add more data members, also update detach_thunk() and/or init_properties() */
00330         unsigned reason;                        
00331         std::string name;                       
00332         BasicBlocks basic_blocks;               
00333         DataBlocks data_blocks;                 
00334         bool pending;                           
00335         rose_addr_t entry_va;                   
00336         Disassembler::AddressSet heads;         
00338     private:
00339         /* If you add more data members, also update detach_thunk() and/or init_properties() */
00340         SgAsmFunction::MayReturn may_return_cur; 
00341         SgAsmFunction::MayReturn may_return_old; 
00342     };
00343     typedef std::map<rose_addr_t, Function*> Functions;
00344 
00346     typedef void (*FunctionDetector)(Partitioner*, SgAsmGenericHeader*);
00347 
00349     struct BlockConfig {
00350         BlockConfig(): ninsns(0), alias_for(0), sucs_specified(false), sucs_complete(false) {}
00351         size_t ninsns;                          
00352         rose_addr_t alias_for;                  
00353         bool sucs_specified;                    
00354         Disassembler::AddressSet sucs;          
00355         bool sucs_complete;                     
00356         SgUnsignedCharList sucs_program;        
00357     };
00358     typedef std::map<rose_addr_t, BlockConfig*> BlockConfigMap;
00359 
00360     /*************************************************************************************************************************
00361      *                                                     Deprecated
00362      *************************************************************************************************************************/
00363 public:
00364 
00365     /* FIXME: Backward compatibility stuff prior to 2010-01-01. These are deprecated and should eventually be removed. They
00366      *        are currently used by src/midend/binaryAnalyses/binary_analysis.C for some of the CFG and call graph functions. */
00367 
00374     typedef std::map<rose_addr_t, Disassembler::AddressSet> BasicBlockStarts;
00375 
00379     BasicBlockStarts detectBasicBlocks(const Disassembler::InstructionMap&) const __attribute__((deprecated));
00380 
00385     struct FunctionStart {
00386         FunctionStart(unsigned reason, std::string name): reason(reason), name(name) {}
00387         unsigned reason;                        
00388         std::string name;                       
00389     };
00390 
00395     typedef std::map<rose_addr_t, FunctionStart> FunctionStarts;
00396 
00400     FunctionStarts detectFunctions(SgAsmInterpretation*, const Disassembler::InstructionMap &insns,
00401                                    BasicBlockStarts &bb_starts/*out*/) const __attribute__((deprecated));
00402 
00403 
00404     /*************************************************************************************************************************
00405      *                                                 Constructors, etc.
00406      *************************************************************************************************************************/
00407 public:
00408 
00409     Partitioner()
00410         : aggregate_mean(NULL), aggregate_variance(NULL), code_criteria(NULL), disassembler(NULL), map(NULL),
00411           func_heuristics(SgAsmFunction::FUNC_DEFAULT), debug(NULL), allow_discont_blocks(true)
00412         {}
00413     virtual ~Partitioner() { clear(); }
00414 
00415     /*************************************************************************************************************************
00416      *                                              Accessors for Properties
00417      *************************************************************************************************************************/
00418 public:
00419 
00423     virtual void set_search(unsigned heuristics) {
00424         func_heuristics = heuristics;
00425     }
00426 
00429     virtual unsigned get_search() const {
00430         return func_heuristics;
00431     }
00432 
00462     void set_allow_discontiguous_blocks(bool b) {
00463         allow_discont_blocks = b;
00464     }
00465 
00467     bool get_allow_discontiguous_blocks() const {
00468         return allow_discont_blocks;
00469     }
00470 
00472     void set_debug(FILE *f) {
00473         debug = f;
00474     }
00475 
00477     FILE *get_debug() const {
00478         return debug;
00479     }
00480 
00497     void set_map(MemoryMap *mmap, MemoryMap *ro_mmap=NULL);
00498     MemoryMap *get_map() const {
00499         return map;
00500     }
00506     void set_progress_reporting(FILE*, unsigned min_interval);
00507 
00508     /*************************************************************************************************************************
00509      *                                                High-level Functions
00510      *************************************************************************************************************************/
00511 public:
00512 
00519     void add_function_detector(FunctionDetector f) {
00520         user_detectors.push_back(f);
00521     }
00522 
00531     static unsigned parse_switches(const std::string&, unsigned initial_flags);
00532 
00537     virtual SgAsmBlock* partition(SgAsmInterpretation*, const Disassembler::InstructionMap&, MemoryMap *mmap=NULL);
00538 
00540     virtual SgAsmBlock* partition(SgAsmInterpretation*, Disassembler*, MemoryMap*);
00541 
00544     virtual void clear();
00545 
00548     virtual void load_config(const std::string &filename);
00549 
00552     virtual void add_instructions(const Disassembler::InstructionMap& insns);
00553 
00556     Disassembler::InstructionMap get_instructions() const;
00557 
00560     const Disassembler::BadMap& get_disassembler_errors() const {
00561         return bad_insns;
00562     }
00563 
00568     void clear_disassembler_errors() {
00569         bad_insns.clear();
00570     }
00571 
00576     virtual Instruction* find_instruction(rose_addr_t, bool create=true);
00577 
00584     virtual Instruction* discard(Instruction*, bool discard_entire_block=false);
00585 
00589     virtual BasicBlock *discard(BasicBlock*);
00590 
00594     virtual Function* add_function(rose_addr_t entry_va, unsigned reasons, std::string name="");
00595 
00597     virtual Function* find_function(rose_addr_t entry_va);
00598 
00611     virtual SgAsmBlock* build_ast(SgAsmInterpretation *interp=NULL);
00612 
00616     virtual void fixup_cfg_edges(SgNode *ast);
00617 
00630     virtual void fixup_pointers(SgNode *ast, SgAsmInterpretation *interp=NULL);
00631 
00632     /**************************************************************************************************************************
00633      *                                  Range maps relating address ranges to objects
00634      **************************************************************************************************************************/
00635 public:
00637     class FunctionRangeMapValue: public RangeMapValue<Extent, Function*> {
00638     public:
00639         FunctionRangeMapValue():            RangeMapValue<Extent, Function*>(NULL) {}
00640         FunctionRangeMapValue(Function *f): RangeMapValue<Extent, Function*>(f)    {} // implicit
00641 
00642         FunctionRangeMapValue split(const Extent &my_range, const Extent::Value &new_end) {
00643             assert(my_range.contains(Extent(new_end)));
00644             return *this;
00645         }
00646 
00647         void print(std::ostream &o) const {
00648             if (NULL==value) {
00649                 o <<"(null)";
00650             } else {
00651                 o <<"F" <<StringUtility::addrToString(value->entry_va);
00652             }
00653         }
00654     };
00655 
00657     typedef RangeMap<Extent, FunctionRangeMapValue> FunctionRangeMap;
00658 
00660     class DataRangeMapValue: public RangeMapValue<Extent, DataBlock*> {
00661     public:
00662         DataRangeMapValue():             RangeMapValue<Extent, DataBlock*>(NULL) {}
00663         DataRangeMapValue(DataBlock *d): RangeMapValue<Extent, DataBlock*>(d)    {} // implicit
00664 
00665         DataRangeMapValue split(const Extent &my_range, const Extent::Value &new_end) {
00666             assert(my_range.contains(Extent(new_end)));
00667             return *this;
00668         }
00669 
00670         void print(std::ostream &o) const {
00671             if (NULL==value) {
00672                 o <<"(null)";
00673             } else {
00674                 o <<"D" <<StringUtility::addrToString(value->address());
00675             }
00676         }
00677     };
00678 
00680     typedef RangeMap<Extent, DataRangeMapValue> DataRangeMap;
00681 
00682     /**************************************************************************************************************************
00683      *                                  Methods for characterizing whether something is code
00684      **************************************************************************************************************************/
00685 public:
00686 
00701     class RegionStats {
00702     private:
00703         struct DictionaryEntry {
00704             DictionaryEntry(): weight(0.0) {}
00705             DictionaryEntry(const std::string &name, const std::string &desc, double weight)
00706                 : name(name), desc(desc), weight(weight) {}
00707             std::string name;
00708             std::string desc;
00709             double weight;                                      
00710         };
00711 
00712         struct AnalysisResult {
00713             AnalysisResult(): sum(0), nsamples(0) {}
00714             AnalysisResult(double d): sum(d), nsamples(1) {} // implicit
00715             double sum;
00716             size_t nsamples;
00717         };
00718 
00719         static std::vector<DictionaryEntry> dictionary;
00720         std::vector<AnalysisResult> results;
00721 
00722     public:
00725         enum AnalysisEnum {
00726             RA_NBYTES=0, RA_NINSNS, RA_NCOVERAGE, RA_RCOVERAGE, RA_NSTARTS, RA_NFAILS, RA_RFAILS, RA_NOVERLAPS, RA_ROVERLAPS,
00727             RA_NINCOMPLETE, RA_RINCOMPLETE, RA_NBRANCHES, RA_RBRANCHES, RA_NCALLS, RA_RCALLS, RA_NNONCALLS, RA_RNONCALLS,
00728             RA_NINTERNAL, RA_RINTERNAL, RA_NICFGEDGES, RA_RICFGEDGES, RA_NCOMPS, RA_RCOMPS, RA_NIUNIQUE, RA_RIUNIQUE,
00729             RA_NREGREFS, RA_RREGREFS, RA_REGSZ, RA_REGVAR, RA_NPRIV, RA_RPRIV, RA_NFLOAT, RA_RFLOAT
00730         };
00731 
00732         RegionStats() { init_class(); }
00733         virtual ~RegionStats() {}
00734         virtual RegionStats* create() const;                    
00737         static size_t define_analysis(const std::string &name, const std::string &desc, double weight, size_t id=(size_t)(-1));
00738         static size_t find_analysis(const std::string &name);   
00739         static size_t get_nanalyses();                          
00740         static const std::string& get_name(size_t id);          
00741         static const std::string& get_desc(size_t id);          
00742         static double get_weight(size_t id);                    
00744         virtual void add_sample(size_t id, double val, size_t nsamples=1);  
00745         virtual size_t get_nsamples(size_t id) const;           
00746         virtual double get_sum(size_t id) const;                
00747         virtual double get_value(size_t id) const;              
00748         virtual void compute_ratios();                          
00749         virtual void set_value(size_t id, double val);          
00751         double divnan(size_t num_id, size_t den_id) const;      
00752         void add_samples(const RegionStats*);                   
00753         void square_diff(const RegionStats*);                   
00755         virtual void print(std::ostream&) const;
00756         friend std::ostream& operator<<(std::ostream&, const RegionStats&);
00757     protected:
00758         static void init_class();
00759 
00760     };
00761 
00825     class CodeCriteria {
00826     private:
00827         struct DictionaryEntry {
00828             DictionaryEntry() {}
00829             DictionaryEntry(const std::string &name, const std::string &desc): name(name), desc(desc) {}
00830             std::string name;
00831             std::string desc;
00832         };
00833 
00834         struct Criterion {
00835             Criterion(): mean(0.0), variance(0.0), weight(0.0) {}
00836             double mean;
00837             double variance;
00838             double weight;
00839         };
00840 
00841         static std::vector<DictionaryEntry> dictionary;
00842         std::vector<Criterion> criteria;
00843         double threshold;
00844 
00845     public:
00846         CodeCriteria(): threshold(0.5) { init_class(); }
00847         CodeCriteria(const RegionStats *mean, const RegionStats *variance, double threshold) {
00848             init_class();
00849             init(mean, variance, threshold);
00850         }
00851         virtual ~CodeCriteria() {}
00852         virtual CodeCriteria* create() const;
00853 
00854         static size_t define_criterion(const std::string &name, const std::string &desc, size_t id=(size_t)(-1));
00855         static size_t find_criterion(const std::string &name);
00856         static size_t get_ncriteria();
00857         static const std::string& get_name(size_t id);
00858         static const std::string& get_desc(size_t id);
00859 
00860         virtual double get_mean(size_t id) const;
00861         virtual void set_mean(size_t id, double mean);
00862         virtual double get_variance(size_t id) const;
00863         virtual void set_variance(size_t id, double variance);
00864         virtual double get_weight(size_t id) const;
00865         virtual void set_weight(size_t id, double weight);
00866         void set_value(size_t id, double mean, double variance, double weight) {
00867             set_mean(id, mean);
00868             set_variance(id, variance);
00869             set_weight(id, weight);
00870         }
00871 
00872         double get_threshold() const { return threshold; }
00873         void set_threshold(double th) { threshold=th; }
00874         virtual double get_vote(const RegionStats*, std::vector<double> *votes=NULL) const;
00875         virtual bool satisfied_by(const RegionStats*, double *raw_vote_ptr=NULL, std::ostream *debug=NULL) const;
00876 
00877         virtual void print(std::ostream&, const RegionStats *stats=NULL, const std::vector<double> *votes=NULL,
00878                            const double *total_vote=NULL) const;
00879         friend std::ostream& operator<<(std::ostream&, const CodeCriteria&);
00880 
00881     protected:
00882         static void init_class();
00883         virtual void init(const RegionStats *mean, const RegionStats *variance, double threshold);
00884         
00885     };
00886 
00890     virtual RegionStats *new_region_stats() {
00891         return new RegionStats;
00892     }
00893 
00897     virtual CodeCriteria *new_code_criteria() {
00898         return new CodeCriteria;
00899     }
00900     virtual CodeCriteria *new_code_criteria(const RegionStats *mean, const RegionStats *variance, double threshold) {
00901         return new CodeCriteria(mean, variance, threshold);
00902     }
00909     virtual RegionStats *region_statistics(const ExtentMap&);
00910     virtual RegionStats *region_statistics(Function*);
00911     virtual RegionStats *region_statistics();
00919     virtual RegionStats *aggregate_statistics(bool do_variance=true);
00920 
00929     virtual RegionStats *get_aggregate_mean() const { return aggregate_mean; }
00930     virtual RegionStats *get_aggregate_variance() const { return aggregate_variance; }
00936     virtual void clear_aggregate_statistics() {
00937         delete aggregate_mean;       aggregate_mean = NULL;
00938         delete aggregate_variance;   aggregate_variance = NULL;
00939     }
00940 
00943     virtual size_t count_kinds(const InstructionMap&);
00944     virtual size_t count_kinds() { return count_privileged(insns); }
00949     virtual size_t count_privileged(const InstructionMap&);
00950     virtual size_t count_privileged() { return count_privileged(insns); }
00951     virtual double ratio_privileged() { return insns.empty() ? NAN : (double)count_privileged(insns) / insns.size(); }
00956     virtual size_t count_floating_point(const InstructionMap&);
00957     virtual size_t count_floating_point() { return count_floating_point(insns); }
00958     virtual double ratio_floating_point() { return insns.empty() ? NAN : (double)count_floating_point(insns) / insns.size(); }
00967     virtual size_t count_registers(const InstructionMap&, double *mean=NULL, double *variance=NULL);
00968     virtual size_t count_registers(double *mean=NULL, double *variance=NULL) { return count_registers(insns, mean, variance); }
00969     virtual double ratio_registers(double *mean=NULL, double *variance=NULL) {
00970         return insns.empty() ? NAN : (double)count_registers(mean, variance) / insns.size();
00971     }
00979     virtual double count_size_variance(const InstructionMap &insns);
00980     virtual double count_size_variance() { return count_size_variance(insns); }
00994     virtual bool is_code(const ExtentMap &region, double *raw_vote_ptr=NULL, std::ostream *debug=NULL);
00995 
01002     virtual CodeCriteria *get_code_criteria() const { return code_criteria; }
01003     virtual void set_code_criteria(CodeCriteria *cc) { code_criteria = cc; }
01006 protected:
01007     RegionStats *aggregate_mean;                
01008     RegionStats *aggregate_variance;            
01009     CodeCriteria *code_criteria;                
01011     /**************************************************************************************************************************
01012      *                                  Functions for scanning through memory
01013      **************************************************************************************************************************/
01014 public:
01015 
01017     class InsnRangeCallback {
01018     public:
01020         struct Args {
01021             Args(Partitioner *partitioner, Instruction *insn_prev, Instruction *insn_begin,
01022                  Instruction *insn_end, size_t ninsns)
01023                 : partitioner(partitioner), insn_prev(insn_prev), insn_begin(insn_begin), insn_end(insn_end),
01024                   ninsns(ninsns) {}
01025             Partitioner *partitioner;
01026             Instruction *insn_prev;                     
01027             Instruction *insn_begin;                    
01028             Instruction *insn_end;                      
01029             size_t ninsns;                              
01030         };
01031 
01032         virtual ~InsnRangeCallback() {}
01033 
01035         virtual bool operator()(bool enabled, const Args &args) = 0;
01036     };
01037     typedef ROSE_Callbacks::List<InsnRangeCallback> InsnRangeCallbacks;
01038 
01040     class ByteRangeCallback {
01041     public:
01043         struct Args {
01044             Args(Partitioner *partitioner, MemoryMap *restrict_map, const FunctionRangeMap &ranges, const Extent &range)
01045                 : partitioner(partitioner), restrict_map(restrict_map), ranges(ranges), range(range) {}
01046             Partitioner *partitioner;
01047             MemoryMap *restrict_map;                    
01048             const FunctionRangeMap &ranges;             
01049             Extent range;                               
01050         };
01051 
01052         virtual ~ByteRangeCallback() {}
01053 
01055         virtual bool operator()(bool enabled, const Args &args) = 0;
01056     };
01057     typedef ROSE_Callbacks::List<ByteRangeCallback> ByteRangeCallbacks;
01058 
01069     virtual void scan_contiguous_insns(InstructionMap insns, InsnRangeCallbacks &cblist,
01070                                        Instruction *insn_prev, Instruction *insn_end);
01071     void scan_contiguous_insns(const InstructionMap &insns, InsnRangeCallback *callback,
01072                                Instruction *insn_prev, Instruction *insn_end) {
01073         InsnRangeCallbacks cblist(callback);
01074         scan_contiguous_insns(insns, cblist, insn_prev, insn_end);
01075     }
01094     virtual void scan_unassigned_insns(InsnRangeCallbacks &callbacks);
01095     void scan_unassigned_insns(InsnRangeCallback *callback) {
01096         InsnRangeCallbacks cblist(callback);
01097         scan_unassigned_insns(cblist);
01098     }
01110     virtual void scan_intrafunc_insns(InsnRangeCallbacks &callbacks);
01111     void scan_intrafunc_insns(InsnRangeCallback *callback) {
01112         InsnRangeCallbacks cblist(callback);
01113         scan_intrafunc_insns(cblist);
01114     }
01131     virtual void scan_interfunc_insns(InsnRangeCallbacks &callbacks);
01132     void scan_interfunc_insns(InsnRangeCallback *callback) {
01133         InsnRangeCallbacks cblist(callback);
01134         scan_interfunc_insns(cblist);
01135     }
01147     virtual void scan_unassigned_bytes(ByteRangeCallbacks &callbacks, MemoryMap *restrict_map=NULL);
01148     void scan_unassigned_bytes(ByteRangeCallback *callback, MemoryMap *restrict_map=NULL) {
01149         ByteRangeCallbacks cblist(callback);
01150         scan_unassigned_bytes(cblist, restrict_map);
01151     }
01162     virtual void scan_intrafunc_bytes(ByteRangeCallbacks &callbacks, MemoryMap *restrict_map=NULL);
01163     void scan_intrafunc_bytes(ByteRangeCallback *callback, MemoryMap *restrict_map=NULL) {
01164         ByteRangeCallbacks cblist(callback);
01165         scan_intrafunc_bytes(cblist, restrict_map);
01166     }
01177     virtual void scan_interfunc_bytes(ByteRangeCallbacks &callbacks, MemoryMap *restrict_map=NULL);
01178     void scan_interfunc_bytes(ByteRangeCallback *callback, MemoryMap *restrict_map=NULL) {
01179         ByteRangeCallbacks cblist(callback);
01180         scan_interfunc_bytes(cblist, restrict_map);
01181     }
01193     struct FindDataPadding: public ByteRangeCallback {
01194         std::vector<SgUnsignedCharList> patterns;       
01195         size_t minimum_nrep;                            
01196         size_t maximum_nrep;                            
01197         bool begins_contiguously;                       
01198         bool ends_contiguously;                         
01199         rose_addr_t maximum_range_size;                 
01200         size_t nfound;                                  
01202         FindDataPadding()
01203             : minimum_nrep(2), maximum_nrep(1024*1024), begins_contiguously(false), ends_contiguously(true),
01204               maximum_range_size(100*1024*1024),  nfound(0) {}
01205         virtual bool operator()(bool enabled, const Args &args);
01206     };
01207 
01212     struct FindData: public ByteRangeCallback {
01213         unsigned excluded_reasons;                      
01214         DataRangeMap *padding_ranges;                   
01215         size_t nfound;                                  
01217         FindData(): excluded_reasons(SgAsmFunction::FUNC_PADDING|SgAsmFunction::FUNC_THUNK), padding_ranges(NULL), nfound(0) {}
01218         ~FindData() { delete padding_ranges; }
01219         virtual bool operator()(bool enabled, const Args &args);
01220     };
01221 
01263     struct FindInsnPadding: public InsnRangeCallback {
01264         std::set<X86InstructionKind> x86_kinds;                 
01265         std::vector<SgUnsignedCharList> byte_patterns;          
01266         bool begins_contiguously;                               
01267         bool ends_contiguously;                                 
01268         size_t minimum_size;                                    
01269         bool add_as_data;                                       
01270         size_t nfound;                                          
01272         FindInsnPadding()
01273             : begins_contiguously(true), ends_contiguously(true), minimum_size(0), add_as_data(true), nfound(0) {}
01274         virtual bool operator()(bool enabled, const Args &args);
01275     };
01276 
01299     struct FindFunctionFragments: public ByteRangeCallback {
01300         bool require_noninterleaved;                            
01301         bool require_intrafunction;                             
01302         double threshold;                                       
01303         unsigned excluded_reasons;                              
01304         size_t nfound;                                          
01306         FunctionRangeMap *function_extents;                     
01307         CodeCriteria *code_criteria;                            
01309         FindFunctionFragments()
01310             : require_noninterleaved(true), require_intrafunction(false), threshold(0.7),
01311               excluded_reasons(SgAsmFunction::FUNC_PADDING|SgAsmFunction::FUNC_THUNK),
01312               nfound(0), function_extents(NULL), code_criteria(NULL)
01313             {}
01314         virtual ~FindFunctionFragments() {
01315             delete function_extents;
01316             delete code_criteria;
01317         }
01318         virtual bool operator()(bool enabled, const Args &args);
01319     };
01320 
01329     struct FindThunks: public InsnRangeCallback {
01330         size_t validate_targets;        
01331         size_t nfound;                  
01333         FindThunks(): validate_targets(true), nfound(0) {}
01334         virtual bool operator()(bool enabled, const Args &args);
01335     };
01336 
01344     struct FindThunkTables: public ByteRangeCallback {
01345         bool begins_contiguously;       
01346         bool ends_contiguously;         
01347         size_t minimum_nthunks;         
01348         bool validate_targets;          
01349         size_t nfound;                  
01351         FindThunkTables()
01352             : begins_contiguously(false), ends_contiguously(false), minimum_nthunks(3), validate_targets(true), nfound(0) {}
01353         virtual bool operator()(bool enabled, const Args &args);
01354     };
01355 
01359     struct FindInterPadFunctions: public ByteRangeCallback {
01360         DataRangeMap *padding_ranges;   
01361         size_t nfound;                  
01363         FindInterPadFunctions(): padding_ranges(NULL), nfound(0) {}
01364         ~FindInterPadFunctions() { delete padding_ranges; }
01365         virtual bool operator()(bool enabled, const Args &args);
01366     };
01367 
01376     struct FindPostFunctionInsns: public InsnRangeCallback {
01377         size_t nfound;                  
01379         FindPostFunctionInsns(): nfound(0) {}
01380         virtual bool operator()(bool enabled, const Args &args);
01381     };
01382 
01383     /**************************************************************************************************************************
01384      *                                  Methods for finding functions by patterns
01385      **************************************************************************************************************************/
01386 protected:
01391     static InstructionMap::const_iterator pattern1(const InstructionMap& insns, InstructionMap::const_iterator first,
01392                                                    Disassembler::AddressSet &exclude);
01393 
01394 #if 0 /* Definitions are also commented out */
01395 
01397     static InstructionMap::const_iterator pattern2(const InstructionMap& insns, InstructionMap::const_iterator first,
01398                                                    Disassembler::AddressSet &exclude);
01399 
01402     static InstructionMap::const_iterator pattern3(const InstructionMap& insns, InstructionMap::const_iterator first,
01403                                                    Disassembler::AddressSet &exclude);
01404 #endif
01405 
01406     /*************************************************************************************************************************
01407      *                                                 Low-level Functions
01408      *
01409      * These are public because they might need to be called by the partitioner's instruction or address traversal callbacks,
01410      * and its often convenient to declare those functors outside any Partitioner subclass.
01411      *************************************************************************************************************************/
01412 public:
01413     /* NOTE: Some of these are documented at their implementation because the documentation is more than what conveniently
01414      *       fits here. */
01415     struct AbandonFunctionDiscovery {};                         
01417     virtual void append(BasicBlock*, Instruction*);             
01418     virtual void append(BasicBlock*, DataBlock*, unsigned reasons); /* Add a data block to a basic block. */
01419     virtual void append(Function*, BasicBlock*, unsigned reasons, bool keep=false); /* Append a basic block to a function */
01420     virtual void append(Function*, DataBlock*, unsigned reasons, bool force=false); /* Append a data block to a function */
01421     virtual void remove(Function*, BasicBlock*);                /* Remove a basic block from a function. */
01422     virtual void remove(Function*, DataBlock*);                 /* Remove a data block from a function. */
01423     virtual void remove(BasicBlock*, DataBlock*);               /* Remove association between basic block and data block. */
01424     virtual BasicBlock* find_bb_containing(rose_addr_t, bool create=true); /* Find basic block containing instruction address */
01425     virtual BasicBlock* find_bb_starting(rose_addr_t, bool create=true);   /* Find or create block starting at specified address */
01426     virtual DataBlock* find_db_starting(rose_addr_t, size_t size); /* Find (or create if size>0) a data block */
01427     virtual Disassembler::AddressSet successors(BasicBlock*, bool *complete=NULL); /* Calculates known successors */
01428     virtual rose_addr_t call_target(BasicBlock*);               /* Returns address if block could be a function call */
01429     virtual void truncate(BasicBlock*, rose_addr_t);            /* Remove instructions from the end of a basic block. */
01430     virtual void discover_first_block(Function*);               /* Adds first basic block to empty function to start discovery. */
01431     virtual void discover_blocks(Function*, unsigned reason);   /* Start to recursively discover blocks of a function. */
01432     virtual void discover_blocks(Function*, rose_addr_t, unsigned reason); /* Recursively discovers blocks of a function. */
01433     virtual void pre_cfg(SgAsmInterpretation *interp=NULL);     
01434     virtual void analyze_cfg(SgAsmBlock::Reason);               
01435     virtual void post_cfg(SgAsmInterpretation *interp=NULL);    
01436     virtual SgAsmFunction* build_ast(Function*);                
01437     virtual SgAsmBlock* build_ast(BasicBlock*);                 
01438     virtual SgAsmBlock* build_ast(DataBlock*);                  
01439     virtual bool pops_return_address(rose_addr_t);              
01440     virtual void update_analyses(BasicBlock*);                  /* Makes sure cached analysis results are current. */
01441     virtual rose_addr_t canonic_block(rose_addr_t);             
01442     virtual bool is_function_call(BasicBlock*, rose_addr_t*);   /* True if basic block appears to call a function. */
01443     virtual bool is_thunk(Function*);                           /* True if function is a thunk. */
01444     virtual Function *effective_function(DataBlock*);           /* Function to which a data block is currently bound. */
01445 
01446     virtual void mark_call_insns();                             
01447     virtual void mark_ipd_configuration();                      
01448     virtual void mark_entry_targets(SgAsmGenericHeader*);       
01449     virtual void mark_eh_frames(SgAsmGenericHeader*);           
01450     virtual void mark_elf_plt_entries(SgAsmGenericHeader*);     
01451     virtual void mark_func_symbols(SgAsmGenericHeader*);        
01452     virtual void mark_func_patterns();                          /* Seeds functions according to instruction patterns */
01453     virtual void name_plt_entries(SgAsmGenericHeader*);         /* Assign names to ELF PLT functions */
01454     virtual void name_import_entries(SgAsmGenericHeader*);      /* Assign names to PE import functions */
01455 
01458     virtual size_t function_extent(FunctionRangeMap *extents);
01459 
01468     virtual size_t function_extent(Function*,
01469                                    FunctionRangeMap *extents=NULL/*in,out*/,
01470                                    rose_addr_t *lo_addr=NULL/*out*/, rose_addr_t *hi_addr=NULL/*out*/);
01471 
01477     virtual size_t datablock_extent(DataBlock*,
01478                                     DataRangeMap *extents=NULL/*in,out*/,
01479                                     rose_addr_t *lo_addr=NULL/*out*/, rose_addr_t *hi_addr=NULL/*out*/);
01480 
01483     virtual size_t datablock_extent(DataRangeMap *extent/*in,out*/);
01484 
01487     virtual size_t padding_extent(DataRangeMap *extent/*in,out*/);
01488 
01493     virtual bool is_contiguous(Function*, bool strict=false);
01494 
01504     static rose_addr_t get_indirection_addr(SgAsmInstruction*);
01505 
01507     static rose_addr_t value_of(SgAsmValueExpression*);
01508 
01512     void progress(FILE*, const char *fmt, ...) const __attribute__((format(printf, 3, 4)));
01513 
01517     virtual size_t detach_thunks();
01518 
01524     virtual bool detach_thunk(Function*);
01525 
01529     virtual void adjust_padding();
01530 
01535     virtual void merge_function_fragments();
01536 
01538     virtual void merge_functions(Function *parent, Function *other);
01539 
01545     Disassembler::AddressSet discover_jump_table(BasicBlock *bb, bool do_create=true, ExtentMap *table_addresses=NULL);
01546 
01547     /*************************************************************************************************************************
01548      *                                   IPD Parser for initializing the Partitioner
01549      *************************************************************************************************************************/
01550 public:
01551 
01716     class IPDParser {
01717     private:
01718         Partitioner *partitioner;               
01719         const char *input;                      
01720         size_t len;                             
01721         std::string input_name;                 
01722         size_t at;                              
01723         Function *cur_func;                     
01724         BlockConfig *cur_block;                 
01726     public:
01727         IPDParser(Partitioner *p, const char *input, size_t len, const std::string &input_name="")
01728             : partitioner(p), input(input), len(len), input_name(input_name), at(0), cur_func(NULL), cur_block(NULL) {}
01729 
01730         class Exception {                      
01731         public:
01732             Exception(const std::string &mesg)
01733                 : lnum(0), mesg(mesg) {}
01734             Exception(const std::string &mesg, const std::string &name, unsigned lnum=0)
01735                 : name(name), lnum(lnum), mesg(mesg) {}
01736             std::string format() const;         
01737             friend std::ostream& operator<<(std::ostream&, const Exception &e);
01738 
01739             std::string name;                   
01740             unsigned lnum;                      
01741             std::string mesg;                   
01742         };
01743 
01744         void parse();                           
01745         static void unparse(std::ostream&, SgNode *ast); 
01748         /*************************************************************************************************************************
01749          * Lexical analysis functions.
01750          *************************************************************************************************************************/
01751     private:
01752         void skip_space();
01753 
01754         /* The is_* functions return true if the next token after white space and comments is of the specified type. */
01755         bool is_terminal(const char *to_match);
01756         bool is_symbol(const char *to_match);
01757         bool is_string();
01758         bool is_number();
01759 
01760         /* The match_* functions skip over white space and comments and attempt to match (and consume) the next token. If the next
01761          * token is not as expected then an exception is thrown. */
01762         void match_terminal(const char *to_match);
01763         void match_symbol(const char *to_match);
01764         std::string match_symbol();
01765         std::string match_string();
01766         rose_addr_t match_number();
01767         std::string match_asm();        /* assembly code inside nested curly braces */
01768 
01769 
01770         /*************************************************************************************************************************
01771          * Parsing functions (see rules above). Each returns true if the construct is present and was parsed, false if the
01772          * construct was not present. They throw an exception if the construct was partially present but an error occurred during
01773          * parsing.
01774          *************************************************************************************************************************/
01775     private:
01776         bool parse_File();
01777         bool parse_Declaration();
01778         bool parse_FuncDecl();
01779         bool parse_FuncBody();
01780         bool parse_FuncStmtList();
01781         bool parse_FuncStmt();
01782         bool parse_ReturnSpec();
01783         bool parse_BlockDecl();
01784         bool parse_BlockBody();
01785         bool parse_BlockStmtList();
01786         bool parse_BlockStmt();
01787         bool parse_Alias();
01788         bool parse_Successors();
01789     };
01790 
01791     /*************************************************************************************************************************
01792      *                                                     Data Members
01793      *
01794      * These are public so they can be accessed by user-defined traversal callbacks that might be declared outside any
01795      * Partitioner subclass.
01796      *************************************************************************************************************************/
01797 public:
01798     Disassembler *disassembler;                         
01799     InstructionMap insns;                               
01800     MemoryMap *map;                                     
01801     MemoryMap ro_map;                                   
01802     Disassembler::BadMap bad_insns;                     
01804     BasicBlocks basic_blocks;                           
01805     Functions functions;                                
01807     DataBlocks data_blocks;                             
01809     unsigned func_heuristics;                           
01810     std::vector<FunctionDetector> user_detectors;       
01812     FILE *debug;                                        
01813     bool allow_discont_blocks;                          
01814     BlockConfigMap block_config;                        
01816     static time_t progress_interval;                    
01817     static time_t progress_time;                        
01818     static FILE *progress_file;                         
01820 public:
01821     static const rose_addr_t NO_TARGET = (rose_addr_t)-1;
01822 };
01823 
01824 #endif

Generated on Wed May 16 06:18:11 2012 for ROSE by  doxygen 1.4.7