blob: 01b1ca07d4e72d00eae19d4f5a161123b346501d [file] [log] [blame]
# Copyright (c) 2011 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
'''This utility cleans up the html files as emitted by doxygen so
that they are suitable for publication on a Google documentation site.
import optparse
import os
import re
import shutil
import string
import sys
from BeautifulSoup import BeautifulSoup, Tag
except (ImportError, NotImplementedError):
print ("This tool requires the BeautifulSoup package "
"Make sure that the file is either in this directory "
"or is available in your PYTHON_PATH")
class HTMLFixer(object):
'''This class cleans up the html strings as produced by Doxygen
def __init__(self, html):
self.soup = BeautifulSoup(html)
def FixTableHeadings(self):
'''Fixes the doxygen table headings.
This includes:
- Using bare <h2> title row instead of row embedded in <tr><td> in table
- Putting the "name" attribute into the "id" attribute of the <tr> tag.
- Splitting up tables into multiple separate tables if a table
heading appears in the middle of a table.
For example, this html:
<tr><td colspan="2"><h2><a name="pub-attribs"></a>
Data Fields List</h2></td></tr>
would be converted to this:
<h2>Data Fields List</h2>
table_headers = []
for tag in self.soup.findAll('tr'):
if and and and['name']:
#tag['id'] =['name']
tag.string = = 'h2'
# reverse the list so that earlier tags don't delete later tags
# Split up tables that have multiple table header (th) rows
for tag in table_headers:
print "Header tag: %s is %s" % (, tag.string.strip())
# Is this a heading in the middle of a table?
if tag.findPreviousSibling('tr') and == 'table':
print "Splitting Table named %s" % tag.string.strip()
table = tag.parent
table_parent = table.parent
table_index = table_parent.contents.index(table)
new_table = Tag(self.soup, name='table', attrs=table.attrs)
table_parent.insert(table_index + 1, new_table)
tag_index = table.contents.index(tag)
for index, row in enumerate(table.contents[tag_index:]):
new_table.insert(index, row)
# Now move the <h2> tag to be in front of the <table> tag
assert == 'table'
table = tag.parent
table_parent = table.parent
table_index = table_parent.contents.index(table)
table_parent.insert(table_index, tag)
def RemoveTopHeadings(self):
'''Removes <div> sections with a header, tabs, or navpath class attribute'''
header_tags = self.soup.findAll(
attrs={'class' : re.compile('^(header|tabs[0-9]*|navpath)$')})
[tag.extract() for tag in header_tags]
def FixAll(self):
def __str__(self):
return str(self.soup)
def main():
'''Main entry for the doxy_cleanup utility
doxy_cleanup takes a list of html files and modifies them in place.'''
parser = optparse.OptionParser(usage='Usage: %prog [options] files...')
parser.add_option('-m', '--move', dest='move', action='store_true',
default=False, help='move html files to "original_html"')
options, files = parser.parse_args()
if not files:
return 1
for filename in files:
with open(filename, 'r') as file:
html =
print "Processing %s" % filename
fixer = HTMLFixer(html)
with open(filename, 'w') as file:
if options.move:
new_directory = os.path.join(
os.path.dirname(os.path.dirname(filename)), 'original_html')
if not os.path.exists(new_directory):
shutil.move(filename, new_directory)
print "Error while processing %s" % filename
return 0
if __name__ == '__main__':