ppapi/c/documentation/html2ezt.py - chromium/src.git - Git at Google

 #!/usr/bin/python

 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 '''This utility converts the html files as emitted by doxygen into ezt files
 that are suitable for inclusion into Google code site.

 EZT stands for "EaZy Templating (for Python)".  For more information, see
 http://code.google.com/p/ezt/
 '''

 import optparse
 import os
 import re
 import shutil
 import string
 import sys
 try:
   from BeautifulSoup import BeautifulSoup, Tag
 except (ImportError, NotImplementedError):
   print ("This tool requires the BeautifulSoup package "
          "(see http://www.crummy.com/software/BeautifulSoup/).\n"
          "Make sure that the file BeautifulSoup.py is either in this directory "
          "or is available in your PYTHON_PATH")
   raise


 class EZTFixer(object):
   '''This class converts the html strings as produced by Doxygen into ezt
   strings as used by the Google code site tools
   '''

   def __init__(self, html):
     self.soup = BeautifulSoup(html)

   def FixTableHeadings(self):
     '''Fixes the doxygen table headings to EZT's liking.

     This includes using <th> instead of <h2> for the heading, and putting
     the "name" attribute into the "id" attribute of the <tr> tag.

     For example, this html:
       <tr><td colspan="2"><h2><a name="pub-attribs"></a>
       Data Fields List</h2></td></tr>

     would be converted to this:
       <tr id="pub-attribs"><th colspan="2">Data Fields List</th></tr>

     Also, this function splits up tables into multiple separate tables if
     a table heading appears in the middle of a table.
     '''

     table_headers = []
     for tag in self.soup.findAll('tr'):
       if tag.td and tag.td.h2 and tag.td.h2.a and tag.td.h2.a['name']:
         tag['id'] = tag.td.h2.a['name']
         tag.td.string = tag.td.h2.a.next
         tag.td.name = 'th'
         table_headers.append(tag)

     # reverse the list so that earlier tags don't delete later tags
     table_headers.reverse()
     # Split up tables that have multiple table header (th) rows
     for tag in table_headers:
       # Is this a heading in the middle of a table?
       if tag.findPreviousSibling('tr') and tag.parent.name == 'table':
         table = tag.parent
         table_parent = table.parent
         table_index = table_parent.contents.index(table)
         new_table = Tag(self.soup, name='table', attrs=table.attrs)
         table_parent.insert(table_index + 1, new_table)
         tag_index = table.contents.index(tag)
         new_table.contents = table.contents[tag_index:]
         del table.contents[tag_index:]

   def RemoveTopHeadings(self):
     '''Removes <div> sections with a header, tabs, or navpath class attribute'''
     header_tags = self.soup.findAll(
         name='div',
         attrs={'class' : re.compile('^(header|tabs[0-9]*|navpath)$')})
     [tag.extract() for tag in header_tags]

   def FixAll(self):
     self.FixTableHeadings()
     self.RemoveTopHeadings()

   def __str__(self):
     return str(self.soup)


 def main():
   '''Main entry for the html2ezt utility

   html2ezt takes a list of html files and creates a set of ezt files with
   the same basename and in the same directory as the original html files.
   Each new ezt file contains a file that is suitable for presentation
   on Google Codesite using the EZT tool.'''

   parser = optparse.OptionParser(usage='Usage: %prog [options] files...')

   parser.add_option('-m', '--move', dest='move', action='store_true',
                     default=False, help='move html files to "original_html"')

   options, files = parser.parse_args()

   if not files:
     parser.print_usage()
     return 1

   for filename in files:
     try:
       with open(filename, 'r') as file:
         html = file.read()

       fixer = EZTFixer(html)
       fixer.FixAll()
       new_name = re.sub(re.compile('\.html$'), '.ezt', filename)
       with open(new_name, 'w') as file:
         file.write(str(fixer))
       if options.move:
         new_directory = os.path.join(
             os.path.dirname(os.path.dirname(filename)), 'original_html')
         if not os.path.exists(new_directory):
           os.mkdir(new_directory)
         shutil.move(filename, new_directory)
     except:
       print "Error while processing %s" % filename
       raise

   return 0

 if __name__ == '__main__':
   sys.exit(main())
	#!/usr/bin/python

	# Copyright (c) 2011 The Chromium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	'''This utility converts the html files as emitted by doxygen into ezt files
	that are suitable for inclusion into Google code site.

	EZT stands for "EaZy Templating (for Python)". For more information, see
	http://code.google.com/p/ezt/
	'''

	import optparse
	import os
	import re
	import shutil
	import string
	import sys
	try:
	from BeautifulSoup import BeautifulSoup, Tag
	except (ImportError, NotImplementedError):
	print ("This tool requires the BeautifulSoup package "
	"(see http://www.crummy.com/software/BeautifulSoup/).\n"
	"Make sure that the file BeautifulSoup.py is either in this directory "
	"or is available in your PYTHON_PATH")
	raise


	class EZTFixer(object):
	'''This class converts the html strings as produced by Doxygen into ezt
	strings as used by the Google code site tools
	'''

	def __init__(self, html):
	self.soup = BeautifulSoup(html)

	def FixTableHeadings(self):
	'''Fixes the doxygen table headings to EZT's liking.

	This includes using <th> instead of <h2> for the heading, and putting
	the "name" attribute into the "id" attribute of the <tr> tag.

	For example, this html:
	<tr><td colspan="2"><h2><a name="pub-attribs"></a>
	Data Fields List</h2></td></tr>

	would be converted to this:
	<tr id="pub-attribs"><th colspan="2">Data Fields List</th></tr>

	Also, this function splits up tables into multiple separate tables if
	a table heading appears in the middle of a table.
	'''

	table_headers = []
	for tag in self.soup.findAll('tr'):
	if tag.td and tag.td.h2 and tag.td.h2.a and tag.td.h2.a['name']:
	tag['id'] = tag.td.h2.a['name']
	tag.td.string = tag.td.h2.a.next
	tag.td.name = 'th'
	table_headers.append(tag)

	# reverse the list so that earlier tags don't delete later tags
	table_headers.reverse()
	# Split up tables that have multiple table header (th) rows
	for tag in table_headers:
	# Is this a heading in the middle of a table?
	if tag.findPreviousSibling('tr') and tag.parent.name == 'table':
	table = tag.parent
	table_parent = table.parent
	table_index = table_parent.contents.index(table)
	new_table = Tag(self.soup, name='table', attrs=table.attrs)
	table_parent.insert(table_index + 1, new_table)
	tag_index = table.contents.index(tag)
	new_table.contents = table.contents[tag_index:]
	del table.contents[tag_index:]

	def RemoveTopHeadings(self):
	'''Removes <div> sections with a header, tabs, or navpath class attribute'''
	header_tags = self.soup.findAll(
	name='div',
	attrs={'class' : re.compile('^(header\|tabs[0-9]*\|navpath)$')})
	[tag.extract() for tag in header_tags]

	def FixAll(self):
	self.FixTableHeadings()
	self.RemoveTopHeadings()

	def __str__(self):
	return str(self.soup)


	def main():
	'''Main entry for the html2ezt utility

	html2ezt takes a list of html files and creates a set of ezt files with
	the same basename and in the same directory as the original html files.
	Each new ezt file contains a file that is suitable for presentation
	on Google Codesite using the EZT tool.'''

	parser = optparse.OptionParser(usage='Usage: %prog [options] files...')

	parser.add_option('-m', '--move', dest='move', action='store_true',
	default=False, help='move html files to "original_html"')

	options, files = parser.parse_args()

	if not files:
	parser.print_usage()
	return 1

	for filename in files:
	try:
	with open(filename, 'r') as file:
	html = file.read()

	fixer = EZTFixer(html)
	fixer.FixAll()
	new_name = re.sub(re.compile('\.html$'), '.ezt', filename)
	with open(new_name, 'w') as file:
	file.write(str(fixer))
	if options.move:
	new_directory = os.path.join(
	os.path.dirname(os.path.dirname(filename)), 'original_html')
	if not os.path.exists(new_directory):
	os.mkdir(new_directory)
	shutil.move(filename, new_directory)
	except:
	print "Error while processing %s" % filename
	raise

	return 0

	if __name__ == '__main__':
	sys.exit(main())