| # -*- coding: Latin-1 -*- |
| """peutils, Portable Executable utilities module |
| |
| |
| Copyright (c) 2005-2011 Ero Carrera <ero.carrera@gmail.com> |
| |
| All rights reserved. |
| |
| For detailed copyright information see the file COPYING in |
| the root of the distribution archive. |
| """ |
| |
| import os |
| import re |
| import string |
| import urllib |
| import pefile |
| |
| __author__ = 'Ero Carrera' |
| __version__ = pefile.__version__ |
| __contact__ = 'ero.carrera@gmail.com' |
| |
| |
| |
| |
| class SignatureDatabase: |
| """This class loads and keeps a parsed PEiD signature database. |
| |
| Usage: |
| |
| sig_db = SignatureDatabase('/path/to/signature/file') |
| |
| and/or |
| |
| sig_db = SignatureDatabase() |
| sig_db.load('/path/to/signature/file') |
| |
| Signature databases can be combined by performing multiple loads. |
| |
| The filename parameter can be a URL too. In that case the |
| signature database will be downloaded from that location. |
| """ |
| |
| def __init__(self, filename=None, data=None): |
| |
| # RegExp to match a signature block |
| # |
| self.parse_sig = re.compile( |
| '\[(.*?)\]\s+?signature\s*=\s*(.*?)(\s+\?\?)*\s*ep_only\s*=\s*(\w+)(?:\s*section_start_only\s*=\s*(\w+)|)', re.S) |
| |
| # Signature information |
| # |
| # Signatures are stored as trees using dictionaries |
| # The keys are the byte values while the values for |
| # each key are either: |
| # |
| # - Other dictionaries of the same form for further |
| # bytes in the signature |
| # |
| # - A dictionary with a string as a key (packer name) |
| # and None as value to indicate a full signature |
| # |
| self.signature_tree_eponly_true = dict () |
| self.signature_count_eponly_true = 0 |
| self.signature_tree_eponly_false = dict () |
| self.signature_count_eponly_false = 0 |
| self.signature_tree_section_start = dict () |
| self.signature_count_section_start = 0 |
| |
| # The depth (length) of the longest signature |
| # |
| self.max_depth = 0 |
| |
| self.__load(filename=filename, data=data) |
| |
| def generate_section_signatures(self, pe, name, sig_length=512): |
| """Generates signatures for all the sections in a PE file. |
| |
| If the section contains any data a signature will be created |
| for it. The signature name will be a combination of the |
| parameter 'name' and the section number and its name. |
| """ |
| |
| section_signatures = list() |
| |
| for idx, section in enumerate(pe.sections): |
| |
| if section.SizeOfRawData < sig_length: |
| continue |
| |
| #offset = pe.get_offset_from_rva(section.VirtualAddress) |
| offset = section.PointerToRawData |
| |
| sig_name = '%s Section(%d/%d,%s)' % ( |
| name, idx + 1, len(pe.sections), |
| ''.join([c for c in section.Name if c in string.printable])) |
| |
| section_signatures.append( |
| self.__generate_signature( |
| pe, offset, sig_name, ep_only=False, |
| section_start_only=True, |
| sig_length=sig_length) ) |
| |
| return '\n'.join(section_signatures)+'\n' |
| |
| |
| |
| def generate_ep_signature(self, pe, name, sig_length=512): |
| """Generate signatures for the entry point of a PE file. |
| |
| Creates a signature whose name will be the parameter 'name' |
| and the section number and its name. |
| """ |
| |
| offset = pe.get_offset_from_rva(pe.OPTIONAL_HEADER.AddressOfEntryPoint) |
| |
| return self.__generate_signature( |
| pe, offset, name, ep_only=True, sig_length=sig_length) |
| |
| |
| |
| def __generate_signature(self, pe, offset, name, ep_only=False, |
| section_start_only=False, sig_length=512): |
| |
| data = pe.__data__[offset:offset+sig_length] |
| |
| signature_bytes = ' '.join(['%02x' % ord(c) for c in data]) |
| |
| if ep_only == True: |
| ep_only = 'true' |
| else: |
| ep_only = 'false' |
| |
| if section_start_only == True: |
| section_start_only = 'true' |
| else: |
| section_start_only = 'false' |
| |
| signature = '[%s]\nsignature = %s\nep_only = %s\nsection_start_only = %s\n' % ( |
| name, signature_bytes, ep_only, section_start_only) |
| |
| return signature |
| |
| def match(self, pe, ep_only=True, section_start_only=False): |
| """Matches and returns the exact match(es). |
| |
| If ep_only is True the result will be a string with |
| the packer name. Otherwise it will be a list of the |
| form (file_ofsset, packer_name). Specifying where |
| in the file the signature was found. |
| """ |
| |
| matches = self.__match(pe, ep_only, section_start_only) |
| |
| # The last match (the most precise) from the |
| # list of matches (if any) is returned |
| # |
| if matches: |
| if ep_only == False: |
| # Get the most exact match for each list of matches |
| # at a given offset |
| # |
| return [(match[0], match[1][-1]) for match in matches] |
| |
| return matches[1][-1] |
| |
| return None |
| |
| def match_all(self, pe, ep_only=True, section_start_only=False): |
| """Matches and returns all the likely matches.""" |
| |
| matches = self.__match(pe, ep_only, section_start_only) |
| |
| if matches: |
| if ep_only == False: |
| # Get the most exact match for each list of matches |
| # at a given offset |
| # |
| return matches |
| |
| return matches[1] |
| |
| return None |
| |
| def __match(self, pe, ep_only, section_start_only): |
| |
| # Load the corresponding set of signatures |
| # Either the one for ep_only equal to True or |
| # to False |
| # |
| if section_start_only is True: |
| |
| # Fetch the data of the executable as it'd |
| # look once loaded in memory |
| # |
| try : |
| data = pe.__data__ |
| except Exception, excp : |
| raise |
| |
| # Load the corresponding tree of signatures |
| # |
| signatures = self.signature_tree_section_start |
| |
| # Set the starting address to start scanning from |
| # |
| scan_addresses = [section.PointerToRawData for section in pe.sections] |
| |
| elif ep_only is True: |
| |
| # Fetch the data of the executable as it'd |
| # look once loaded in memory |
| # |
| try : |
| data = pe.get_memory_mapped_image() |
| except Exception, excp : |
| raise |
| |
| # Load the corresponding tree of signatures |
| # |
| signatures = self.signature_tree_eponly_true |
| |
| # Fetch the entry point of the PE file and the data |
| # at the entry point |
| # |
| ep = pe.OPTIONAL_HEADER.AddressOfEntryPoint |
| |
| # Set the starting address to start scanning from |
| # |
| scan_addresses = [ep] |
| |
| else: |
| |
| data = pe.__data__ |
| |
| signatures = self.signature_tree_eponly_false |
| |
| scan_addresses = xrange( len(data) ) |
| |
| # For each start address, check if any signature matches |
| # |
| matches = [] |
| for idx in scan_addresses: |
| result = self.__match_signature_tree( |
| signatures, |
| data[idx:idx+self.max_depth]) |
| if result: |
| matches.append( (idx, result) ) |
| |
| # Return only the matched items found at the entry point if |
| # ep_only is True (matches will have only one element in that |
| # case) |
| # |
| if ep_only is True: |
| if matches: |
| return matches[0] |
| |
| return matches |
| |
| |
| def match_data(self, code_data, ep_only=True, section_start_only=False): |
| |
| data = code_data |
| scan_addresses = [ 0 ] |
| |
| # Load the corresponding set of signatures |
| # Either the one for ep_only equal to True or |
| # to False |
| # |
| if section_start_only is True: |
| |
| # Load the corresponding tree of signatures |
| # |
| signatures = self.signature_tree_section_start |
| |
| # Set the starting address to start scanning from |
| # |
| |
| elif ep_only is True: |
| |
| # Load the corresponding tree of signatures |
| # |
| signatures = self.signature_tree_eponly_true |
| |
| |
| # For each start address, check if any signature matches |
| # |
| matches = [] |
| for idx in scan_addresses: |
| result = self.__match_signature_tree( |
| signatures, |
| data[idx:idx+self.max_depth]) |
| if result: |
| matches.append( (idx, result) ) |
| |
| # Return only the matched items found at the entry point if |
| # ep_only is True (matches will have only one element in that |
| # case) |
| # |
| if ep_only is True: |
| if matches: |
| return matches[0] |
| |
| return matches |
| |
| |
| def __match_signature_tree(self, signature_tree, data, depth = 0): |
| """Recursive function to find matches along the signature tree. |
| |
| signature_tree is the part of the tree left to walk |
| data is the data being checked against the signature tree |
| depth keeps track of how far we have gone down the tree |
| """ |
| |
| |
| matched_names = list () |
| match = signature_tree |
| |
| # Walk the bytes in the data and match them |
| # against the signature |
| # |
| for idx, byte in enumerate ( [ord (b) for b in data] ): |
| |
| # If the tree is exhausted... |
| # |
| if match is None : |
| break |
| |
| # Get the next byte in the tree |
| # |
| match_next = match.get(byte, None) |
| |
| |
| # If None is among the values for the key |
| # it means that a signature in the database |
| # ends here and that there's an exact match. |
| # |
| if None in match.values(): |
| # idx represent how deep we are in the tree |
| # |
| #names = [idx+depth] |
| names = list() |
| |
| # For each of the item pairs we check |
| # if it has an element other than None, |
| # if not then we have an exact signature |
| # |
| for item in match.items(): |
| if item[1] is None : |
| names.append (item[0]) |
| matched_names.append(names) |
| |
| # If a wildcard is found keep scanning the signature |
| # ignoring the byte. |
| # |
| if match.has_key ('??') : |
| match_tree_alternate = match.get ('??', None) |
| data_remaining = data[idx + 1 :] |
| if data_remaining: |
| matched_names.extend( |
| self.__match_signature_tree( |
| match_tree_alternate, data_remaining, idx+depth+1)) |
| |
| match = match_next |
| |
| # If we have any more packer name in the end of the signature tree |
| # add them to the matches |
| # |
| if match is not None and None in match.values(): |
| #names = [idx + depth + 1] |
| names = list() |
| for item in match.items() : |
| if item[1] is None: |
| names.append(item[0]) |
| matched_names.append(names) |
| |
| return matched_names |
| |
| def load(self , filename=None, data=None): |
| """Load a PEiD signature file. |
| |
| Invoking this method on different files combines the signatures. |
| """ |
| |
| self.__load(filename=filename, data=data) |
| |
| def __load(self, filename=None, data=None): |
| |
| |
| if filename is not None: |
| # If the path does not exist, attempt to open a URL |
| # |
| if not os.path.exists(filename): |
| try: |
| sig_f = urllib.urlopen(filename) |
| sig_data = sig_f.read() |
| sig_f.close() |
| except IOError: |
| # Let this be raised back to the user... |
| raise |
| else: |
| # Get the data for a file |
| # |
| try: |
| sig_f = file( filename, 'rt' ) |
| sig_data = sig_f.read() |
| sig_f.close() |
| except IOError: |
| # Let this be raised back to the user... |
| raise |
| else: |
| sig_data = data |
| |
| # If the file/URL could not be read or no "raw" data |
| # was provided there's nothing else to do |
| # |
| if not sig_data: |
| return |
| |
| # Helper function to parse the signature bytes |
| # |
| def to_byte(value) : |
| if value == '??' or value == '?0' : |
| return value |
| return int (value, 16) |
| |
| |
| # Parse all the signatures in the file |
| # |
| matches = self.parse_sig.findall(sig_data) |
| |
| # For each signature, get the details and load it into the |
| # signature tree |
| # |
| for packer_name, signature, superfluous_wildcards, ep_only, section_start_only in matches: |
| |
| ep_only = ep_only.strip().lower() |
| |
| signature = signature.replace('\\n', '').strip() |
| |
| signature_bytes = [to_byte(b) for b in signature.split()] |
| |
| if ep_only == 'true': |
| ep_only = True |
| else: |
| ep_only = False |
| |
| if section_start_only == 'true': |
| section_start_only = True |
| else: |
| section_start_only = False |
| |
| |
| depth = 0 |
| |
| if section_start_only is True: |
| |
| tree = self.signature_tree_section_start |
| self.signature_count_section_start += 1 |
| |
| else: |
| if ep_only is True : |
| tree = self.signature_tree_eponly_true |
| self.signature_count_eponly_true += 1 |
| else : |
| tree = self.signature_tree_eponly_false |
| self.signature_count_eponly_false += 1 |
| |
| for idx, byte in enumerate (signature_bytes) : |
| |
| if idx+1 == len(signature_bytes): |
| |
| tree[byte] = tree.get( byte, dict() ) |
| tree[byte][packer_name] = None |
| |
| else : |
| |
| tree[byte] = tree.get ( byte, dict() ) |
| |
| tree = tree[byte] |
| depth += 1 |
| |
| if depth > self.max_depth: |
| self.max_depth = depth |
| |