ppapi/cpp/documentation/doxy_cleanup.py - chromium/src - Git at Google

 #!/usr/bin/env python
 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 '''This utility cleans up the html files as emitted by doxygen so
 that they are suitable for publication on a Google documentation site.
 '''

 import optparse
 import os
 import re
 import shutil
 import string
 import sys
 try:
   from BeautifulSoup import BeautifulSoup, Tag
 except (ImportError, NotImplementedError):
   print ("This tool requires the BeautifulSoup package "
          "(see http://www.crummy.com/software/BeautifulSoup/).\n"
          "Make sure that the file BeautifulSoup.py is either in this directory "
          "or is available in your PYTHON_PATH")
   raise


 class HTMLFixer(object):
   '''This class cleans up the html strings as produced by Doxygen
   '''

   def __init__(self, html):
     self.soup = BeautifulSoup(html)

   def FixTableHeadings(self):
     '''Fixes the doxygen table headings.

     This includes:
       - Using bare <h2> title row instead of row embedded in <tr><td> in table
       - Putting the "name" attribute into the "id" attribute of the <tr> tag.
       - Splitting up tables into multiple separate tables if a table
         heading appears in the middle of a table.

     For example, this html:
      <table>
       <tr><td colspan="2"><h2><a name="pub-attribs"></a>
       Data Fields List</h2></td></tr>
       ...
      </table>

     would be converted to this:
      <h2>Data Fields List</h2>
      <table>
       ...
      </table>
     '''

     table_headers = []
     for tag in self.soup.findAll('tr'):
       if tag.td and tag.td.h2 and tag.td.h2.a and tag.td.h2.a['name']:
         #tag['id'] = tag.td.h2.a['name']
         tag.string = tag.td.h2.a.next
         tag.name = 'h2'
         table_headers.append(tag)

     # reverse the list so that earlier tags don't delete later tags
     table_headers.reverse()
     # Split up tables that have multiple table header (th) rows
     for tag in table_headers:
       print "Header tag: %s is %s" % (tag.name, tag.string.strip())
       # Is this a heading in the middle of a table?
       if tag.findPreviousSibling('tr') and tag.parent.name == 'table':
         print "Splitting Table named %s" % tag.string.strip()
         table = tag.parent
         table_parent = table.parent
         table_index = table_parent.contents.index(table)
         new_table = Tag(self.soup, name='table', attrs=table.attrs)
         table_parent.insert(table_index + 1, new_table)
         tag_index = table.contents.index(tag)
         for index, row in enumerate(table.contents[tag_index:]):
           new_table.insert(index, row)
       # Now move the <h2> tag to be in front of the <table> tag
       assert tag.parent.name == 'table'
       table = tag.parent
       table_parent = table.parent
       table_index = table_parent.contents.index(table)
       table_parent.insert(table_index, tag)

   def RemoveTopHeadings(self):
     '''Removes <div> sections with a header, tabs, or navpath class attribute'''
     header_tags = self.soup.findAll(
         name='div',
         attrs={'class' : re.compile('^(header|tabs[0-9]*|navpath)$')})
     [tag.extract() for tag in header_tags]

   def FixAll(self):
     self.FixTableHeadings()
     self.RemoveTopHeadings()

   def __str__(self):
     return str(self.soup)


 def main():
   '''Main entry for the doxy_cleanup utility

   doxy_cleanup takes a list of html files and modifies them in place.'''

   parser = optparse.OptionParser(usage='Usage: %prog [options] files...')

   parser.add_option('-m', '--move', dest='move', action='store_true',
                     default=False, help='move html files to "original_html"')

   options, files = parser.parse_args()

   if not files:
     parser.print_usage()
     return 1

   for filename in files:
     try:
       with open(filename, 'r') as file:
         html = file.read()

       print "Processing %s" % filename
       fixer = HTMLFixer(html)
       fixer.FixAll()
       with open(filename, 'w') as file:
         file.write(str(fixer))
       if options.move:
         new_directory = os.path.join(
             os.path.dirname(os.path.dirname(filename)), 'original_html')
         if not os.path.exists(new_directory):
           os.mkdir(new_directory)
         shutil.move(filename, new_directory)
     except:
       print "Error while processing %s" % filename
       raise

   return 0


 if __name__ == '__main__':
   sys.exit(main())
	#!/usr/bin/env python
	# Copyright (c) 2012 The Chromium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	'''This utility cleans up the html files as emitted by doxygen so
	that they are suitable for publication on a Google documentation site.
	'''

	import optparse
	import os
	import re
	import shutil
	import string
	import sys
	try:
	from BeautifulSoup import BeautifulSoup, Tag
	except (ImportError, NotImplementedError):
	print ("This tool requires the BeautifulSoup package "
	"(see http://www.crummy.com/software/BeautifulSoup/).\n"
	"Make sure that the file BeautifulSoup.py is either in this directory "
	"or is available in your PYTHON_PATH")
	raise


	class HTMLFixer(object):
	'''This class cleans up the html strings as produced by Doxygen
	'''

	def __init__(self, html):
	self.soup = BeautifulSoup(html)

	def FixTableHeadings(self):
	'''Fixes the doxygen table headings.

	This includes:
	- Using bare <h2> title row instead of row embedded in <tr><td> in table
	- Putting the "name" attribute into the "id" attribute of the <tr> tag.
	- Splitting up tables into multiple separate tables if a table
	heading appears in the middle of a table.

	For example, this html:
	<table>
	<tr><td colspan="2"><h2><a name="pub-attribs"></a>
	Data Fields List</h2></td></tr>
	...
	</table>

	would be converted to this:
	<h2>Data Fields List</h2>
	<table>
	...
	</table>
	'''

	table_headers = []
	for tag in self.soup.findAll('tr'):
	if tag.td and tag.td.h2 and tag.td.h2.a and tag.td.h2.a['name']:
	#tag['id'] = tag.td.h2.a['name']
	tag.string = tag.td.h2.a.next
	tag.name = 'h2'
	table_headers.append(tag)

	# reverse the list so that earlier tags don't delete later tags
	table_headers.reverse()
	# Split up tables that have multiple table header (th) rows
	for tag in table_headers:
	print "Header tag: %s is %s" % (tag.name, tag.string.strip())
	# Is this a heading in the middle of a table?
	if tag.findPreviousSibling('tr') and tag.parent.name == 'table':
	print "Splitting Table named %s" % tag.string.strip()
	table = tag.parent
	table_parent = table.parent
	table_index = table_parent.contents.index(table)
	new_table = Tag(self.soup, name='table', attrs=table.attrs)
	table_parent.insert(table_index + 1, new_table)
	tag_index = table.contents.index(tag)
	for index, row in enumerate(table.contents[tag_index:]):
	new_table.insert(index, row)
	# Now move the <h2> tag to be in front of the <table> tag
	assert tag.parent.name == 'table'
	table = tag.parent
	table_parent = table.parent
	table_index = table_parent.contents.index(table)
	table_parent.insert(table_index, tag)

	def RemoveTopHeadings(self):
	'''Removes <div> sections with a header, tabs, or navpath class attribute'''
	header_tags = self.soup.findAll(
	name='div',
	attrs={'class' : re.compile('^(header\|tabs[0-9]*\|navpath)$')})
	[tag.extract() for tag in header_tags]

	def FixAll(self):
	self.FixTableHeadings()
	self.RemoveTopHeadings()

	def __str__(self):
	return str(self.soup)


	def main():
	'''Main entry for the doxy_cleanup utility

	doxy_cleanup takes a list of html files and modifies them in place.'''

	parser = optparse.OptionParser(usage='Usage: %prog [options] files...')

	parser.add_option('-m', '--move', dest='move', action='store_true',
	default=False, help='move html files to "original_html"')

	options, files = parser.parse_args()

	if not files:
	parser.print_usage()
	return 1

	for filename in files:
	try:
	with open(filename, 'r') as file:
	html = file.read()

	print "Processing %s" % filename
	fixer = HTMLFixer(html)
	fixer.FixAll()
	with open(filename, 'w') as file:
	file.write(str(fixer))
	if options.move:
	new_directory = os.path.join(
	os.path.dirname(os.path.dirname(filename)), 'original_html')
	if not os.path.exists(new_directory):
	os.mkdir(new_directory)
	shutil.move(filename, new_directory)
	except:
	print "Error while processing %s" % filename
	raise

	return 0


	if __name__ == '__main__':
	sys.exit(main())