| #!/usr/bin/env python |
| # Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| '''This utility cleans up the html files as emitted by doxygen so |
| that they are suitable for publication on a Google documentation site. |
| ''' |
| |
| import argparse |
| import glob |
| import os |
| import re |
| import shutil |
| import sys |
| |
| try: |
| from BeautifulSoup import BeautifulSoup, Tag |
| except (ImportError, NotImplementedError): |
| print ("This tool requires the BeautifulSoup package " |
| "(see http://www.crummy.com/software/BeautifulSoup/).\n" |
| "Make sure that the file BeautifulSoup.py is either in this directory " |
| "or is available in your PYTHON_PATH") |
| raise |
| |
| |
| def Trace(msg): |
| if Trace.verbose: |
| sys.stderr.write(str(msg) + '\n') |
| |
| Trace.verbose = False |
| |
| |
| FILES_TO_REMOVE = [ |
| '*.css', |
| '*.map', |
| '*.md5', |
| 'annotated.html', |
| 'bc_s.png', |
| 'classes.html', |
| 'closed.png', |
| 'doxygen.png', |
| 'files.html', |
| 'functions*.html', |
| 'globals_0x*.html', |
| 'globals_enum.html', |
| 'globals_eval.html', |
| 'globals_func.html', |
| 'globals.html', |
| 'globals_type.html', |
| 'globals_vars.html', |
| 'graph_legend.html', |
| 'graph_legend.png', |
| 'hierarchy.html', |
| 'index_8dox.html', |
| 'index.html', |
| 'modules.html', |
| 'namespacemembers_func.html', |
| 'namespacemembers.html', |
| 'namespaces.html', |
| 'nav_f.png', |
| 'nav_h.png', |
| 'open.png', |
| 'tab_a.png', |
| 'tab_b.png', |
| 'tab_h.png', |
| 'tab_s.png', |
| ] |
| |
| |
| class HTMLFixer(object): |
| '''This class cleans up the html strings as produced by Doxygen |
| ''' |
| |
| def __init__(self, html): |
| self.soup = BeautifulSoup(html) |
| |
| def FixTableHeadings(self): |
| '''Fixes the doxygen table headings. |
| |
| This includes: |
| - Using bare <h2> title row instead of row embedded in <tr><td> in table |
| - Putting the "name" attribute into the "id" attribute of the <tr> tag. |
| - Splitting up tables into multiple separate tables if a table |
| heading appears in the middle of a table. |
| |
| For example, this html: |
| <table> |
| <tr><td colspan="2"><h2><a name="pub-attribs"></a> |
| Data Fields List</h2></td></tr> |
| ... |
| </table> |
| |
| would be converted to this: |
| <h2>Data Fields List</h2> |
| <table> |
| ... |
| </table> |
| ''' |
| |
| table_headers = [] |
| for tag in self.soup.findAll('tr'): |
| if tag.td and tag.td.h2 and tag.td.h2.a and tag.td.h2.a['name']: |
| #tag['id'] = tag.td.h2.a['name'] |
| tag.string = tag.td.h2.a.next |
| tag.name = 'h2' |
| table_headers.append(tag) |
| |
| # reverse the list so that earlier tags don't delete later tags |
| table_headers.reverse() |
| # Split up tables that have multiple table header (th) rows |
| for tag in table_headers: |
| Trace("Header tag: %s is %s" % (tag.name, tag.string.strip())) |
| # Is this a heading in the middle of a table? |
| if tag.findPreviousSibling('tr') and tag.parent.name == 'table': |
| Trace("Splitting Table named %s" % tag.string.strip()) |
| table = tag.parent |
| table_parent = table.parent |
| table_index = table_parent.contents.index(table) |
| new_table = Tag(self.soup, name='table', attrs=table.attrs) |
| table_parent.insert(table_index + 1, new_table) |
| tag_index = table.contents.index(tag) |
| for index, row in enumerate(table.contents[tag_index:]): |
| new_table.insert(index, row) |
| # Now move the <h2> tag to be in front of the <table> tag |
| assert tag.parent.name == 'table' |
| table = tag.parent |
| table_parent = table.parent |
| table_index = table_parent.contents.index(table) |
| table_parent.insert(table_index, tag) |
| |
| def RemoveTopHeadings(self): |
| '''Removes <div> sections with a header, tabs, or navpath class attribute''' |
| header_tags = self.soup.findAll( |
| name='div', |
| attrs={'class' : re.compile('^(header|tabs[0-9]*|navpath)$')}) |
| [tag.extract() for tag in header_tags] |
| |
| def RemoveVersionNumbers(self, html): |
| '''Horrible hack to strip _#_# from struct names.''' |
| return re.sub(r'(_\d_\d)(?=[": <])', '', html) |
| |
| def FixAll(self): |
| self.FixTableHeadings() |
| self.RemoveTopHeadings() |
| html = str(self.soup) |
| html = self.RemoveVersionNumbers(html) |
| return html |
| |
| |
| def main(args): |
| """Main entry for the doxy_cleanup utility |
| |
| doxy_cleanup cleans up the html files generated by doxygen. |
| """ |
| |
| parser = argparse.ArgumentParser(description=__doc__) |
| parser.add_argument('-v', '--verbose', help='verbose output.', |
| action='store_true') |
| parser.add_argument('directory') |
| |
| options = parser.parse_args(args) |
| |
| if options.verbose: |
| Trace.verbose = True |
| |
| root_dir = options.directory |
| html_dir = os.path.join(root_dir, 'html') |
| |
| # Doxygen puts all files in an 'html' directory. |
| # First, move all files from that directory to root_dir. |
| for filename in glob.glob(os.path.join(html_dir, '*')): |
| Trace('Moving %s -> %s' % (filename, root_dir)) |
| shutil.move(filename, root_dir) |
| |
| # Now remove the 'html' directory. |
| Trace('Removing %s' % html_dir) |
| os.rmdir(html_dir) |
| |
| # Then remove unneeded files. |
| for wildcard in FILES_TO_REMOVE: |
| Trace('Removing "%s":' % wildcard) |
| path = os.path.join(root_dir, wildcard) |
| for filename in glob.glob(path): |
| Trace(' Removing "%s"' % filename) |
| os.remove(filename) |
| |
| # Now, fix the HTML files we've kept. |
| Trace('Fixing HTML files...') |
| for root, _, files in os.walk(root_dir): |
| for filename in files: |
| if not os.path.splitext(filename)[1] == '.html': |
| Trace('Skipping %s' % filename) |
| continue |
| |
| filename = os.path.join(root, filename) |
| Trace('Processing "%s"...' % filename) |
| try: |
| with open(filename) as f: |
| html = f.read() |
| |
| fixer = HTMLFixer(html) |
| output = fixer.FixAll() |
| with open(filename, 'w') as f: |
| f.write(output) |
| except: |
| sys.stderr.write("Error while processing %s\n" % filename) |
| raise |
| |
| return 0 |
| |
| if __name__ == '__main__': |
| try: |
| rtn = main(sys.argv[1:]) |
| except KeyboardInterrupt: |
| sys.stderr.write('%s: interrupted\n' % os.path.basename(__file__)) |
| rtn = 1 |
| sys.exit(rtn) |