native_client_sdk/src/doc/doxygen/doxy_cleanup.py - chromium/src.git - Git at Google

 #!/usr/bin/env python
 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 '''This utility cleans up the html files as emitted by doxygen so
 that they are suitable for publication on a Google documentation site.
 '''

 import argparse
 import glob
 import os
 import re
 import shutil
 import sys

 try:
   from BeautifulSoup import BeautifulSoup, Tag
 except (ImportError, NotImplementedError):
   print ("This tool requires the BeautifulSoup package "
          "(see http://www.crummy.com/software/BeautifulSoup/).\n"
          "Make sure that the file BeautifulSoup.py is either in this directory "
          "or is available in your PYTHON_PATH")
   raise


 def Trace(msg):
   if Trace.verbose:
     sys.stderr.write(str(msg) + '\n')

 Trace.verbose = False


 FILES_TO_REMOVE = [
   '*.css',
   '*.map',
   '*.md5',
   'annotated.html',
   'bc_s.png',
   'classes.html',
   'closed.png',
   'doxygen.png',
   'files.html',
   'functions*.html',
   'globals_0x*.html',
   'globals_enum.html',
   'globals_eval.html',
   'globals_func.html',
   'globals.html',
   'globals_type.html',
   'globals_vars.html',
   'graph_legend.html',
   'graph_legend.png',
   'hierarchy.html',
   'index_8dox.html',
   'index.html',
   'modules.html',
   'namespacemembers_func.html',
   'namespacemembers.html',
   'namespaces.html',
   'nav_f.png',
   'nav_h.png',
   'open.png',
   'tab_a.png',
   'tab_b.png',
   'tab_h.png',
   'tab_s.png',
 ]


 class HTMLFixer(object):
   '''This class cleans up the html strings as produced by Doxygen
   '''

   def __init__(self, html):
     self.soup = BeautifulSoup(html)

   def FixTableHeadings(self):
     '''Fixes the doxygen table headings.

     This includes:
       - Using bare <h2> title row instead of row embedded in <tr><td> in table
       - Putting the "name" attribute into the "id" attribute of the <tr> tag.
       - Splitting up tables into multiple separate tables if a table
         heading appears in the middle of a table.

     For example, this html:
      <table>
       <tr><td colspan="2"><h2><a name="pub-attribs"></a>
       Data Fields List</h2></td></tr>
       ...
      </table>

     would be converted to this:
      <h2>Data Fields List</h2>
      <table>
       ...
      </table>
     '''

     table_headers = []
     for tag in self.soup.findAll('tr'):
       if tag.td and tag.td.h2 and tag.td.h2.a and tag.td.h2.a['name']:
         #tag['id'] = tag.td.h2.a['name']
         tag.string = tag.td.h2.a.next
         tag.name = 'h2'
         table_headers.append(tag)

     # reverse the list so that earlier tags don't delete later tags
     table_headers.reverse()
     # Split up tables that have multiple table header (th) rows
     for tag in table_headers:
       Trace("Header tag: %s is %s" % (tag.name, tag.string.strip()))
       # Is this a heading in the middle of a table?
       if tag.findPreviousSibling('tr') and tag.parent.name == 'table':
         Trace("Splitting Table named %s" % tag.string.strip())
         table = tag.parent
         table_parent = table.parent
         table_index = table_parent.contents.index(table)
         new_table = Tag(self.soup, name='table', attrs=table.attrs)
         table_parent.insert(table_index + 1, new_table)
         tag_index = table.contents.index(tag)
         for index, row in enumerate(table.contents[tag_index:]):
           new_table.insert(index, row)
       # Now move the <h2> tag to be in front of the <table> tag
       assert tag.parent.name == 'table'
       table = tag.parent
       table_parent = table.parent
       table_index = table_parent.contents.index(table)
       table_parent.insert(table_index, tag)

   def RemoveTopHeadings(self):
     '''Removes <div> sections with a header, tabs, or navpath class attribute'''
     header_tags = self.soup.findAll(
         name='div',
         attrs={'class' : re.compile('^(header|tabs[0-9]*|navpath)$')})
     [tag.extract() for tag in header_tags]

   def RemoveVersionNumbers(self, html):
     '''Horrible hack to strip _#_# from struct names.'''
     return re.sub(r'(_\d_\d)(?=[": <])', '', html)

   def FixAll(self):
     self.FixTableHeadings()
     self.RemoveTopHeadings()
     html = str(self.soup)
     html = self.RemoveVersionNumbers(html)
     return html


 def main(args):
   """Main entry for the doxy_cleanup utility

   doxy_cleanup cleans up the html files generated by doxygen.
   """

   parser = argparse.ArgumentParser(description=__doc__)
   parser.add_argument('-v', '--verbose', help='verbose output.',
                       action='store_true')
   parser.add_argument('directory')

   options = parser.parse_args(args)

   if options.verbose:
     Trace.verbose = True

   root_dir = options.directory
   html_dir = os.path.join(root_dir, 'html')

   # Doxygen puts all files in an 'html' directory.
   # First, move all files from that directory to root_dir.
   for filename in glob.glob(os.path.join(html_dir, '*')):
     Trace('Moving %s -> %s' % (filename, root_dir))
     shutil.move(filename, root_dir)

   # Now remove the 'html' directory.
   Trace('Removing %s' % html_dir)
   os.rmdir(html_dir)

   # Then remove unneeded files.
   for wildcard in FILES_TO_REMOVE:
     Trace('Removing "%s":' % wildcard)
     path = os.path.join(root_dir, wildcard)
     for filename in glob.glob(path):
       Trace('  Removing "%s"' % filename)
       os.remove(filename)

   # Now, fix the HTML files we've kept.
   Trace('Fixing HTML files...')
   for root, _, files in os.walk(root_dir):
     for filename in files:
       if not os.path.splitext(filename)[1] == '.html':
         Trace('Skipping %s' % filename)
         continue

       filename = os.path.join(root, filename)
       Trace('Processing "%s"...' % filename)
       try:
         with open(filename) as f:
           html = f.read()

         fixer = HTMLFixer(html)
         output = fixer.FixAll()
         with open(filename, 'w') as f:
           f.write(output)
       except:
         sys.stderr.write("Error while processing %s\n" % filename)
         raise

   return 0

 if __name__ == '__main__':
   try:
     rtn = main(sys.argv[1:])
   except KeyboardInterrupt:
     sys.stderr.write('%s: interrupted\n' % os.path.basename(__file__))
     rtn = 1
   sys.exit(rtn)
	#!/usr/bin/env python
	# Copyright (c) 2011 The Chromium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	'''This utility cleans up the html files as emitted by doxygen so
	that they are suitable for publication on a Google documentation site.
	'''

	import argparse
	import glob
	import os
	import re
	import shutil
	import sys

	try:
	from BeautifulSoup import BeautifulSoup, Tag
	except (ImportError, NotImplementedError):
	print ("This tool requires the BeautifulSoup package "
	"(see http://www.crummy.com/software/BeautifulSoup/).\n"
	"Make sure that the file BeautifulSoup.py is either in this directory "
	"or is available in your PYTHON_PATH")
	raise


	def Trace(msg):
	if Trace.verbose:
	sys.stderr.write(str(msg) + '\n')

	Trace.verbose = False


	FILES_TO_REMOVE = [
	'*.css',
	'*.map',
	'*.md5',
	'annotated.html',
	'bc_s.png',
	'classes.html',
	'closed.png',
	'doxygen.png',
	'files.html',
	'functions*.html',
	'globals_0x*.html',
	'globals_enum.html',
	'globals_eval.html',
	'globals_func.html',
	'globals.html',
	'globals_type.html',
	'globals_vars.html',
	'graph_legend.html',
	'graph_legend.png',
	'hierarchy.html',
	'index_8dox.html',
	'index.html',
	'modules.html',
	'namespacemembers_func.html',
	'namespacemembers.html',
	'namespaces.html',
	'nav_f.png',
	'nav_h.png',
	'open.png',
	'tab_a.png',
	'tab_b.png',
	'tab_h.png',
	'tab_s.png',
	]


	class HTMLFixer(object):
	'''This class cleans up the html strings as produced by Doxygen
	'''

	def __init__(self, html):
	self.soup = BeautifulSoup(html)

	def FixTableHeadings(self):
	'''Fixes the doxygen table headings.

	This includes:
	- Using bare <h2> title row instead of row embedded in <tr><td> in table
	- Putting the "name" attribute into the "id" attribute of the <tr> tag.
	- Splitting up tables into multiple separate tables if a table
	heading appears in the middle of a table.

	For example, this html:
	<table>
	<tr><td colspan="2"><h2><a name="pub-attribs"></a>
	Data Fields List</h2></td></tr>
	...
	</table>

	would be converted to this:
	<h2>Data Fields List</h2>
	<table>
	...
	</table>
	'''

	table_headers = []
	for tag in self.soup.findAll('tr'):
	if tag.td and tag.td.h2 and tag.td.h2.a and tag.td.h2.a['name']:
	#tag['id'] = tag.td.h2.a['name']
	tag.string = tag.td.h2.a.next
	tag.name = 'h2'
	table_headers.append(tag)

	# reverse the list so that earlier tags don't delete later tags
	table_headers.reverse()
	# Split up tables that have multiple table header (th) rows
	for tag in table_headers:
	Trace("Header tag: %s is %s" % (tag.name, tag.string.strip()))
	# Is this a heading in the middle of a table?
	if tag.findPreviousSibling('tr') and tag.parent.name == 'table':
	Trace("Splitting Table named %s" % tag.string.strip())
	table = tag.parent
	table_parent = table.parent
	table_index = table_parent.contents.index(table)
	new_table = Tag(self.soup, name='table', attrs=table.attrs)
	table_parent.insert(table_index + 1, new_table)
	tag_index = table.contents.index(tag)
	for index, row in enumerate(table.contents[tag_index:]):
	new_table.insert(index, row)
	# Now move the <h2> tag to be in front of the <table> tag
	assert tag.parent.name == 'table'
	table = tag.parent
	table_parent = table.parent
	table_index = table_parent.contents.index(table)
	table_parent.insert(table_index, tag)

	def RemoveTopHeadings(self):
	'''Removes <div> sections with a header, tabs, or navpath class attribute'''
	header_tags = self.soup.findAll(
	name='div',
	attrs={'class' : re.compile('^(header\|tabs[0-9]*\|navpath)$')})
	[tag.extract() for tag in header_tags]

	def RemoveVersionNumbers(self, html):
	'''Horrible hack to strip _#_# from struct names.'''
	return re.sub(r'(_\d_\d)(?=[": <])', '', html)

	def FixAll(self):
	self.FixTableHeadings()
	self.RemoveTopHeadings()
	html = str(self.soup)
	html = self.RemoveVersionNumbers(html)
	return html


	def main(args):
	"""Main entry for the doxy_cleanup utility

	doxy_cleanup cleans up the html files generated by doxygen.
	"""

	parser = argparse.ArgumentParser(description=__doc__)
	parser.add_argument('-v', '--verbose', help='verbose output.',
	action='store_true')
	parser.add_argument('directory')

	options = parser.parse_args(args)

	if options.verbose:
	Trace.verbose = True

	root_dir = options.directory
	html_dir = os.path.join(root_dir, 'html')

	# Doxygen puts all files in an 'html' directory.
	# First, move all files from that directory to root_dir.
	for filename in glob.glob(os.path.join(html_dir, '*')):
	Trace('Moving %s -> %s' % (filename, root_dir))
	shutil.move(filename, root_dir)

	# Now remove the 'html' directory.
	Trace('Removing %s' % html_dir)
	os.rmdir(html_dir)

	# Then remove unneeded files.
	for wildcard in FILES_TO_REMOVE:
	Trace('Removing "%s":' % wildcard)
	path = os.path.join(root_dir, wildcard)
	for filename in glob.glob(path):
	Trace(' Removing "%s"' % filename)
	os.remove(filename)

	# Now, fix the HTML files we've kept.
	Trace('Fixing HTML files...')
	for root, _, files in os.walk(root_dir):
	for filename in files:
	if not os.path.splitext(filename)[1] == '.html':
	Trace('Skipping %s' % filename)
	continue

	filename = os.path.join(root, filename)
	Trace('Processing "%s"...' % filename)
	try:
	with open(filename) as f:
	html = f.read()

	fixer = HTMLFixer(html)
	output = fixer.FixAll()
	with open(filename, 'w') as f:
	f.write(output)
	except:
	sys.stderr.write("Error while processing %s\n" % filename)
	raise

	return 0

	if __name__ == '__main__':
	try:
	rtn = main(sys.argv[1:])
	except KeyboardInterrupt:
	sys.stderr.write('%s: interrupted\n' % os.path.basename(__file__))
	rtn = 1
	sys.exit(rtn)