tools/code_coverage/croc_scan.py - chromium/src - Git at Google

 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 """Crocodile source scanners."""


 import re


 class Scanner(object):
   """Generic source scanner."""

   def __init__(self):
     """Constructor."""

     self.re_token = re.compile('#')
     self.comment_to_eol = ['#']
     self.comment_start = None
     self.comment_end = None

   def ScanLines(self, lines):
     """Scans the lines for executable statements.

     Args:
       lines: Iterator returning source lines.

     Returns:
       An array of line numbers which are executable.
     """
     exe_lines = []
     lineno = 0

     in_string = None
     in_comment = None
     comment_index = None

     for line in lines:
       lineno += 1
       in_string_at_start = in_string

       for t in self.re_token.finditer(line):
         tokenstr = t.groups()[0]

         if in_comment:
           # Inside a multi-line comment, so look for end token
           if tokenstr == in_comment:
             in_comment = None
             # Replace comment with spaces
             line = (line[:comment_index]
                     + ' ' * (t.end(0) - comment_index)
                     + line[t.end(0):])

         elif in_string:
           # Inside a string, so look for end token
           if tokenstr == in_string:
             in_string = None

         elif tokenstr in self.comment_to_eol:
           # Single-line comment, so truncate line at start of token
           line = line[:t.start(0)]
           break

         elif tokenstr == self.comment_start:
           # Multi-line comment start - end token is comment_end
           in_comment = self.comment_end
           comment_index = t.start(0)

         else:
           # Starting a string - end token is same as start
           in_string = tokenstr

       # If still in comment at end of line, remove comment
       if in_comment:
         line = line[:comment_index]
         # Next line, delete from the beginnine
         comment_index = 0

       # If line-sans-comments is not empty, claim it may be executable
       if line.strip() or in_string_at_start:
         exe_lines.append(lineno)

     # Return executable lines
     return exe_lines

   def Scan(self, filename):
     """Reads the file and scans its lines.

     Args:
       filename: Path to file to scan.

     Returns:
       An array of line numbers which are executable.
     """

     # TODO: All manner of error checking
     f = None
     try:
       f = open(filename, 'rt')
       return self.ScanLines(f)
     finally:
       if f:
         f.close()


 class PythonScanner(Scanner):
   """Python source scanner."""

   def __init__(self):
     """Constructor."""
     Scanner.__init__(self)

     # TODO: This breaks for strings ending in more than 2 backslashes.  Need
     # a pattern which counts only an odd number of backslashes, so the last
     # one thus escapes the quote.
     self.re_token = re.compile(r'(#|\'\'\'|"""|(?<!(?<!\\)\\)["\'])')
     self.comment_to_eol = ['#']
     self.comment_start = None
     self.comment_end = None


 class CppScanner(Scanner):
   """C / C++ / ObjC / ObjC++ source scanner."""

   def __init__(self):
     """Constructor."""
     Scanner.__init__(self)

     # TODO: This breaks for strings ending in more than 2 backslashes.  Need
     # a pattern which counts only an odd number of backslashes, so the last
     # one thus escapes the quote.
     self.re_token = re.compile(r'(^\s*#|//|/\*|\*/|(?<!(?<!\\)\\)["\'])')

     # TODO: Treat '\' at EOL as a token, and handle it as continuing the
     # previous line.  That is, if in a comment-to-eol, this line is a comment
     # too.

     # Note that we treat # at beginning of line as a comment, so that we ignore
     # preprocessor definitions
     self.comment_to_eol = ['//', '#']

     self.comment_start = '/*'
     self.comment_end = '*/'


 def ScanFile(filename, language):
   """Scans a file for executable lines.

   Args:
     filename: Path to file to scan.
     language: Language for file ('C', 'C++', 'python', 'ObjC', 'ObjC++')

   Returns:
     A list of executable lines, or an empty list if the file was not a handled
         language.
   """

   if language == 'python':
     return PythonScanner().Scan(filename)
   elif language in ['C', 'C++', 'ObjC', 'ObjC++']:
     return CppScanner().Scan(filename)

   # Something we don't handle
   return []
	# Copyright (c) 2011 The Chromium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	"""Crocodile source scanners."""


	import re


	class Scanner(object):
	"""Generic source scanner."""

	def __init__(self):
	"""Constructor."""

	self.re_token = re.compile('#')
	self.comment_to_eol = ['#']
	self.comment_start = None
	self.comment_end = None

	def ScanLines(self, lines):
	"""Scans the lines for executable statements.

	Args:
	lines: Iterator returning source lines.

	Returns:
	An array of line numbers which are executable.
	"""
	exe_lines = []
	lineno = 0

	in_string = None
	in_comment = None
	comment_index = None

	for line in lines:
	lineno += 1
	in_string_at_start = in_string

	for t in self.re_token.finditer(line):
	tokenstr = t.groups()[0]

	if in_comment:
	# Inside a multi-line comment, so look for end token
	if tokenstr == in_comment:
	in_comment = None
	# Replace comment with spaces
	line = (line[:comment_index]
	+ ' ' * (t.end(0) - comment_index)
	+ line[t.end(0):])

	elif in_string:
	# Inside a string, so look for end token
	if tokenstr == in_string:
	in_string = None

	elif tokenstr in self.comment_to_eol:
	# Single-line comment, so truncate line at start of token
	line = line[:t.start(0)]
	break

	elif tokenstr == self.comment_start:
	# Multi-line comment start - end token is comment_end
	in_comment = self.comment_end
	comment_index = t.start(0)

	else:
	# Starting a string - end token is same as start
	in_string = tokenstr

	# If still in comment at end of line, remove comment
	if in_comment:
	line = line[:comment_index]
	# Next line, delete from the beginnine
	comment_index = 0

	# If line-sans-comments is not empty, claim it may be executable
	if line.strip() or in_string_at_start:
	exe_lines.append(lineno)

	# Return executable lines
	return exe_lines

	def Scan(self, filename):
	"""Reads the file and scans its lines.

	Args:
	filename: Path to file to scan.

	Returns:
	An array of line numbers which are executable.
	"""

	# TODO: All manner of error checking
	f = None
	try:
	f = open(filename, 'rt')
	return self.ScanLines(f)
	finally:
	if f:
	f.close()


	class PythonScanner(Scanner):
	"""Python source scanner."""

	def __init__(self):
	"""Constructor."""
	Scanner.__init__(self)

	# TODO: This breaks for strings ending in more than 2 backslashes. Need
	# a pattern which counts only an odd number of backslashes, so the last
	# one thus escapes the quote.
	self.re_token = re.compile(r'(#\|\'\'\'\|"""\|(?<!(?<!\\)\\)["\'])')
	self.comment_to_eol = ['#']
	self.comment_start = None
	self.comment_end = None


	class CppScanner(Scanner):
	"""C / C++ / ObjC / ObjC++ source scanner."""

	def __init__(self):
	"""Constructor."""
	Scanner.__init__(self)

	# TODO: This breaks for strings ending in more than 2 backslashes. Need
	# a pattern which counts only an odd number of backslashes, so the last
	# one thus escapes the quote.
	self.re_token = re.compile(r'(^\s#\|//\|/\\|\*/\|(?<!(?<!\\)\\)["\'])')

	# TODO: Treat '\' at EOL as a token, and handle it as continuing the
	# previous line. That is, if in a comment-to-eol, this line is a comment
	# too.

	# Note that we treat # at beginning of line as a comment, so that we ignore
	# preprocessor definitions
	self.comment_to_eol = ['//', '#']

	self.comment_start = '/*'
	self.comment_end = '*/'


	def ScanFile(filename, language):
	"""Scans a file for executable lines.

	Args:
	filename: Path to file to scan.
	language: Language for file ('C', 'C++', 'python', 'ObjC', 'ObjC++')

	Returns:
	A list of executable lines, or an empty list if the file was not a handled
	language.
	"""

	if language == 'python':
	return PythonScanner().Scan(filename)
	elif language in ['C', 'C++', 'ObjC', 'ObjC++']:
	return CppScanner().Scan(filename)

	# Something we don't handle
	return []