-Improved parsing files with thousands of sections. Sections that appear to be invalid will now be ignored and some of the checks have been optimized for the cases where a file still has many possibly valid sections. git-svn-id: http://pefile.googlecode.com/svn/trunk@125 8842bc4e-7134-0410-8230-5dc5194fb5c1

commit: fd6e9851ca16a0ae67f5424c10e93225deac84b5 [log] [tgz]
author: ero.carrera@gmail.com <ero.carrera@gmail.com@8842bc4e-7134-0410-8230-5dc5194fb5c1> Sun Dec 16 12:34:04 2012
committer: ero.carrera@gmail.com <ero.carrera@gmail.com@8842bc4e-7134-0410-8230-5dc5194fb5c1> Sun Dec 16 12:34:04 2012
tree: 901aac4dcfa7db7fed2a05e4f74299ae58a38968
parent: 6fbf45c72aed00c5833d088749febd3706ef8212 [diff]
diff --git a/pefile.py b/pefile.py
index 1b9b172..219c91a 100644
--- a/pefile.py
+++ b/pefile.py

@@ -1053,12 +1053,13 @@
         VirtualAddress_adj = self.pe.adjust_SectionAlignment( self.VirtualAddress,
             self.pe.OPTIONAL_HEADER.SectionAlignment, self.pe.OPTIONAL_HEADER.FileAlignment )
 
-        # Check if there's any further section that will start before the
-        # calculated end for the current one, if so cut the current's size
-        # to fit in the range until the next one starts.
-        for s in self.pe.sections:
-            if s.VirtualAddress > self.VirtualAddress and VirtualAddress_adj + size > s.VirtualAddress:
-                    size = s.VirtualAddress - VirtualAddress_adj
+        # Check whether there's any section after the current one that starts before the
+        # calculated end for the current one, if so, cut the current section's size
+        # to fit in the range up to where the next section starts.
+        if (self.next_section_virtual_address is not None and
+            self.next_section_virtual_address > self.VirtualAddress and
+            VirtualAddress_adj + size > self.next_section_virtual_address):
+                size = self.next_section_virtual_address - VirtualAddress_adj
 
         return VirtualAddress_adj <= rva < VirtualAddress_adj + size
 
@@ -1068,12 +1069,6 @@
         return self.contains_rva(rva)
 
 
-    #def set_data(self, data):
-    #    """Set the data belonging to the section."""
-    #
-    #    self.data = data
-
-
     def get_entropy(self):
         """Calculate and return the entropy for the section."""
 
@@ -1129,12 +1124,13 @@
 
 
 
-class DataContainer:
+class DataContainer(object):
     """Generic data container."""
 
     def __init__(self, **args):
+        bare_setattr = super(DataContainer, self).__setattr__
         for key, value in args.items():
-            setattr(self, key, value)
+            bare_setattr(key, value)
 
 
 
@@ -2203,11 +2199,17 @@
                 self.__warnings.append(
                     ('Error parsing section %d. ' % i) +
                     'PointerToRawData points beyond the end of the file.')
+                # Skip these. If we can't get to the raw data it will likely not be a
+                # real section.
+                continue
 
             if section.Misc_VirtualSize > 0x10000000:
                 self.__warnings.append(
                     ('Suspicious value found parsing section %d. ' % i) +
                     'VirtualSize is extremely large > 256MiB.')
+                # Skip these. It will likely not be a real section.
+                continue
+
 
             if self.adjust_SectionAlignment( section.VirtualAddress,
                 self.OPTIONAL_HEADER.SectionAlignment, self.OPTIONAL_HEADER.FileAlignment ) > 0x10000000:
@@ -2254,8 +2256,19 @@
                         'Both IMAGE_SCN_MEM_WRITE and IMAGE_SCN_MEM_EXECUTE are set. ' +
                         'This might indicate a packed executable.')
 
+
             self.sections.append(section)
 
+        # Sort the sections by their VirtualAddress and add a field to each of them
+        # with the VirtualAddress of the next section. This will allow to check
+        # for potentially overlapping sections in badly constructed PEs.
+        self.sections.sort(cmp=lambda a,b: cmp(a.VirtualAddress, b.VirtualAddress))
+        for idx, section in enumerate(self.sections):
+            if idx == len(self.sections)-1:
+                section.next_section_virtual_address = None
+            else:
+                section.next_section_virtual_address = self.sections[idx+1].VirtualAddress
+
         if self.FILE_HEADER.NumberOfSections > 0 and self.sections:
             return offset + self.sections[0].sizeof()*self.FILE_HEADER.NumberOfSections
         else:
@@ -3754,7 +3767,7 @@
         ImageBase+offset.
 
         The optional argument 'max_virtual_address' provides with means of limiting
-        which section are processed.
+        which sections are processed.
         Any section with their VirtualAddress beyond this value will be skipped.
         Normally, sections with values beyond this range are just there to confuse
         tools. It's a common trick to see in packed executables.
commit	fd6e9851ca16a0ae67f5424c10e93225deac84b5	[log] [tgz]
author	ero.carrera@gmail.com <ero.carrera@gmail.com@8842bc4e-7134-0410-8230-5dc5194fb5c1>	Sun Dec 16 12:34:04 2012
committer	ero.carrera@gmail.com <ero.carrera@gmail.com@8842bc4e-7134-0410-8230-5dc5194fb5c1>	Sun Dec 16 12:34:04 2012
tree	901aac4dcfa7db7fed2a05e4f74299ae58a38968
parent	6fbf45c72aed00c5833d088749febd3706ef8212 [diff]