tools/lou_harnessGenerator - external/liblouis-github - Git at Google

 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 # Liblouis test harness generator
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Library General Public
 # License as published by the Free Software Foundation; either
 # version 2 of the License, or (at your option) any later version.
 #
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Library General Public License for more details.
 #
 # You should have received a copy of the GNU Library General Public
 # License along with this library; if not, write to the
 # Free Software Foundation, Inc., Franklin Street, Fifth Floor,
 # Boston MA  02110-1301 USA.
 #
 # Copyright (c) 2012, Hammer Attila, Mesar Hameed.

 """Liblouis test harness generator:
 This is a tool to help to generate harness files.
 You are expected to read the generated output, paying special attension to the "input" and "output" lines, and correct the fields, according to
 the formal braille standard for your table.
 The reason why they might contain errors is because the liblouis tables may not be conforming with the formal braille standard, and the purpose
 of the harness files is to catch these edge cases so that the liblouis tables can be improved.
 For input examples please have a look at the files in the tests/harnessSources directory at the top of the Liblouis source tree.

 @author: Hammer Attila <hammer.attila@infoalap.hu>
 @author: Mesar Hameed <mesar.hameed@gmail.com>
 """

 import os, sys, argparse, json, codecs
 from louis import translate, hyphenate
 from louis import noContractions, compbrlAtCursor, dotsIO, comp8Dots, pass1Only, compbrlLeftCursor, otherTrans, ucBrl


 # check what version of python we are running under.
 PY2 = sys.version_info[0] == 2

 modes = {
     '': 0,
     'noContractions': noContractions,
     'compbrlAtCursor': compbrlAtCursor,
     'dotsIO': dotsIO,
     'comp8Dots': comp8Dots,
     'pass1Only': pass1Only,
     'compbrlLeftCursor': compbrlLeftCursor,
     'otherTrans': otherTrans,
     'ucBrl': ucBrl
 }

 test_types = {
     'translate': 'translate',
     'backtranslate': 'backtranslate',
     'hyphenate': 'hyphenate'
 }

 # all the possible flags that a testcase can override
 possibleFlags = ['brlCursorPos', 'cursorPos', 'mode', 'testmode', 'comment']

 # Temporary hack, translation table search paths.
 possible_table_paths = ["tables", "tests/tables", "/usr/share/liblouis/tables", "/usr/local/share/liblouis/tables"]


 class HarnessError(Exception):
     """ Raised by the generator to gracefully process known exceptions. """
     pass

 # Add support for command line arguments.
 parser = argparse.ArgumentParser(description='Test harness file generator')
 parser.add_argument('-u', action="store_true", dest="unicodebraille", default=False, help='Use unicode braille for output.')
 parser.add_argument('-d', action='store', default='', dest="directory", help="Output directory for the generated harness file(s).")
 parser.add_argument('infiles', action='store', nargs='+', help='Filenames where the source text is stored, wildcards are supported.')
 args = parser.parse_args()

 # Check if output directory command line option was given
 # and if so make sure path ends with the / character.
 if args.directory:
     if not args.directory.endswith("/"):
         args.directory += "/"
     # Make sure output directory exists.
     if not os.path.exists(args.directory):
         try:
             os.makedirs(args.directory)
         except OSError as e:
             print("Error: unable to create {dir} {msg}.\nHarness generation aborted.".format(dir=args.directory, msg=e.strerror.lower()))
             sys.exit()


 def check_tables_exists(headerLine):
     """ Converts the first line (the header line) of a test harness source file, which is a string to a list and checks that the tables are in the liblouis search path. """
     if not headerLine.startswith("tables: "):
         raise HarnessError("Header line doesn't declare which translation tables should be used.")
     translationTables = headerLine.strip('\n').replace("tables: ", "").replace(",", "").split()
     if args.unicodebraille and "unicode.dis" not in translationTables:
         translationTables.insert(0, "unicode.dis")
     for table in translationTables:
         notFound = 0
         for path in possible_table_paths:
             if not os.path.exists(path + '/' + table):
                 notFound += 1
         if notFound == len(possible_table_paths):
             raise HarnessError("Unable to locate translation table '%s'." % table)
     return translationTables

 def hyphenateword(word, tables, mode):
     try:
         # FIXME: liblouis currently crashes if we dont add space at end of the word, probably due to a counter running past the end of the string.
         # medium/longterm this hack should be removed, and the root of the problem found/resolved.
         hyphen_mask=hyphenate(tables, word+' ', mode)
     except RuntimeError as e:
         raise HarnessError('Hyphenation not possible, %s.' % e.message)

     # FIXME: why on python 2 do we need to remove the last item, and on python3 it is needed?
     # i.e. in python2 word and hyphen_mask not of the same length.
     if PY2:
         return "".join( map(lambda a,b: "-"+a if b=='1' else a, word, hyphen_mask)[:-1] )
     else:
         return "".join( list(map(lambda a,b: "-"+a if b=='1' else a, word, hyphen_mask)) )


 def processHarnessSource(sourceFileName):
     try:
         with codecs.open(sourceFileName, 'r', 'utf-8') as f:
             lines=f.readlines()
     except IOError as e:
         raise HarnessError("Reading {file}, {msg}.".format(file=e.filename, msg=e.strerror.lower()))
     # process headerLine which should contain translation table information.
     translationTables = check_tables_exists(lines[0])
     lines.remove(lines[0])

     # Initialize harness data structure:
     harness = {}
     harness['tables'] = translationTables
     harness['tests'] = []

     translate_tests = {}
     translate_tests['data'] = []
     backtranslate_tests = {}
     backtranslate_tests['flags'] = {}
     backtranslate_tests['flags']['testmode'] = 'backtranslate'
     backtranslate_tests['data'] = []
     hyphenate_tests = {}
     hyphenate_tests['flags'] = {}
     hyphenate_tests['flags']['testmode'] = 'hyphenate'
     hyphenate_tests['data'] = []
     if args.unicodebraille:
         backtranslate_tests['flags']['outputUniBrl'] = args.unicodebraille
         hyphenate_tests['flags']['outputUniBrl'] = args.unicodebraille
         translate_tests['flags'] = {}
         translate_tests['flags']['outputUniBrl'] = args.unicodebraille

     limit = len(lines)
     testcase = {}
     # Default testmode is to translate, unless specifically overwridden.
     testmode = 'translate'

     # Process the remaining lines of the harness source.
     for i in range(limit):
         lines[i] = lines[i].rstrip('\r\n')
         if lines[i] == '': continue

         # If it is the first comment within the testcase, then add it, if it is a long comment split over multiple lines, then append it to the
         # testcase comment field.
         if (lines[i].startswith('Comment: ')) or (lines[i].startswith('comment: ')):
             if 'comment' not in testcase:
                 testcase['comment'] = []
             testcase['comment'].append(lines[i][len('Comment: '):])
             continue

         # Does this line contain any processing flags
         # example:
         # brlCursorPos: 2 cursorPos: 4 mode: compbrlAtCursor
         if any(x in lines[i].split(': ') for x in possibleFlags):
             optionlist=lines[i].split(' ')
             for i in range(0, len(optionlist), 2):
                 optionlist[i]=optionlist[i].replace(":", "")
                 optionlist[i+1]=optionlist[i+1].replace(",", "")
                 if optionlist[i] in ["brlCursorPos", "cursorPos"]:
                     testcase[optionlist[i]] = int(optionlist[i+1])
                 elif optionlist[i] == 'testmode' and optionlist[i+1] in test_types:
                     testmode = optionlist[i+1]
                 elif optionlist[i] == 'mode' and optionlist[i+1] in modes:
                     testcase['mode'] = optionlist[i+1]
             continue

         # What liblouis translation mode bits should be set
         mode = modes[testcase.get('mode', '')]
         cursorPos = testcase.get('cursorPos', 0)

         # By now we have finnished processing testcase options and we know what
         # we want to do for this testcase.
         # So depending on testmode, do the needed work.

         try:
             brl = translate(translationTables, lines[i], None, cursorPos, mode)[0]
         except RuntimeError as e:
             raise HarnessError("translate impossible, a problem occured %s." % e.message)
         brl=brl.replace("", "\u007f")

         if testmode == 'translate':
             testcase['input'] = lines[i]
             testcase['output'] = brl
             translate_tests['data'].append(testcase)
         elif testmode == 'backtranslate':
             testcase['input'] = brl
             testcase['output'] = lines[i]
             backtranslate_tests['data'].append(testcase)
         if testmode == 'hyphenate':
             testcase['input'] = lines[i]
             testcase['output'] = hyphenateword(lines[i], translationTables, mode)
             hyphenate_tests['data'].append(testcase)

         testcase = {}
         # Default testmode is to translate, unless specifically overwridden.
         testmode = 'translate'

     # Harness source has been processed, prepare to output harness file.
     generatedTestcases = len(backtranslate_tests['data']) + len(translate_tests['data']) + len(hyphenate_tests['data'])
     if len(backtranslate_tests['data']):
         harness['tests'].append(backtranslate_tests)
     if len(translate_tests['data']):
         harness['tests'].append(translate_tests)
     if len(hyphenate_tests['data']):
         harness['tests'].append(hyphenate_tests)

     if not generatedTestcases:
         raise HarnessError('No testcases generated.')
     outfile = args.directory + os.path.basename(sourceFileName).replace('_source', '')
     try:
         with codecs.open(outfile, 'w', 'utf-8') as f:
             json.dump(harness, f, indent=2, sort_keys=True, ensure_ascii=False)
     except IOError as e:
         raise HarnessError("Writeing {file}, {msg}.".format(file=e.filename, msg=e.strerror.lower()))
     else:
         print("{file}: {number} testcases, please carefully inspect and apply any manual corrections.".format(file=outfile, number=generatedTestcases))

 # Begin processing input files
 for infile in args.infiles:
     try:
         processHarnessSource(infile)
     except HarnessError as e:
         print("Error: while processing {source}. {msg}".format(source=infile, msg=e.message))
	#!/usr/bin/env python
	# -- coding: utf-8 --
	# Liblouis test harness generator
	#
	# This library is free software; you can redistribute it and/or
	# modify it under the terms of the GNU Library General Public
	# License as published by the Free Software Foundation; either
	# version 2 of the License, or (at your option) any later version.
	#
	# This library is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	# Library General Public License for more details.
	#
	# You should have received a copy of the GNU Library General Public
	# License along with this library; if not, write to the
	# Free Software Foundation, Inc., Franklin Street, Fifth Floor,
	# Boston MA 02110-1301 USA.
	#
	# Copyright (c) 2012, Hammer Attila, Mesar Hameed.

	"""Liblouis test harness generator:
	This is a tool to help to generate harness files.
	You are expected to read the generated output, paying special attension to the "input" and "output" lines, and correct the fields, according to
	the formal braille standard for your table.
	The reason why they might contain errors is because the liblouis tables may not be conforming with the formal braille standard, and the purpose
	of the harness files is to catch these edge cases so that the liblouis tables can be improved.
	For input examples please have a look at the files in the tests/harnessSources directory at the top of the Liblouis source tree.

	@author: Hammer Attila <hammer.attila@infoalap.hu>
	@author: Mesar Hameed <mesar.hameed@gmail.com>
	"""

	import os, sys, argparse, json, codecs
	from louis import translate, hyphenate
	from louis import noContractions, compbrlAtCursor, dotsIO, comp8Dots, pass1Only, compbrlLeftCursor, otherTrans, ucBrl


	# check what version of python we are running under.
	PY2 = sys.version_info[0] == 2

	modes = {
	'': 0,
	'noContractions': noContractions,
	'compbrlAtCursor': compbrlAtCursor,
	'dotsIO': dotsIO,
	'comp8Dots': comp8Dots,
	'pass1Only': pass1Only,
	'compbrlLeftCursor': compbrlLeftCursor,
	'otherTrans': otherTrans,
	'ucBrl': ucBrl
	}

	test_types = {
	'translate': 'translate',
	'backtranslate': 'backtranslate',
	'hyphenate': 'hyphenate'
	}

	# all the possible flags that a testcase can override
	possibleFlags = ['brlCursorPos', 'cursorPos', 'mode', 'testmode', 'comment']

	# Temporary hack, translation table search paths.
	possible_table_paths = ["tables", "tests/tables", "/usr/share/liblouis/tables", "/usr/local/share/liblouis/tables"]


	class HarnessError(Exception):
	""" Raised by the generator to gracefully process known exceptions. """
	pass

	# Add support for command line arguments.
	parser = argparse.ArgumentParser(description='Test harness file generator')
	parser.add_argument('-u', action="store_true", dest="unicodebraille", default=False, help='Use unicode braille for output.')
	parser.add_argument('-d', action='store', default='', dest="directory", help="Output directory for the generated harness file(s).")
	parser.add_argument('infiles', action='store', nargs='+', help='Filenames where the source text is stored, wildcards are supported.')
	args = parser.parse_args()

	# Check if output directory command line option was given
	# and if so make sure path ends with the / character.
	if args.directory:
	if not args.directory.endswith("/"):
	args.directory += "/"
	# Make sure output directory exists.
	if not os.path.exists(args.directory):
	try:
	os.makedirs(args.directory)
	except OSError as e:
	print("Error: unable to create {dir} {msg}.\nHarness generation aborted.".format(dir=args.directory, msg=e.strerror.lower()))
	sys.exit()


	def check_tables_exists(headerLine):
	""" Converts the first line (the header line) of a test harness source file, which is a string to a list and checks that the tables are in the liblouis search path. """
	if not headerLine.startswith("tables: "):
	raise HarnessError("Header line doesn't declare which translation tables should be used.")
	translationTables = headerLine.strip('\n').replace("tables: ", "").replace(",", "").split()
	if args.unicodebraille and "unicode.dis" not in translationTables:
	translationTables.insert(0, "unicode.dis")
	for table in translationTables:
	notFound = 0
	for path in possible_table_paths:
	if not os.path.exists(path + '/' + table):
	notFound += 1
	if notFound == len(possible_table_paths):
	raise HarnessError("Unable to locate translation table '%s'." % table)
	return translationTables

	def hyphenateword(word, tables, mode):
	try:
	# FIXME: liblouis currently crashes if we dont add space at end of the word, probably due to a counter running past the end of the string.
	# medium/longterm this hack should be removed, and the root of the problem found/resolved.
	hyphen_mask=hyphenate(tables, word+' ', mode)
	except RuntimeError as e:
	raise HarnessError('Hyphenation not possible, %s.' % e.message)

	# FIXME: why on python 2 do we need to remove the last item, and on python3 it is needed?
	# i.e. in python2 word and hyphen_mask not of the same length.
	if PY2:
	return "".join( map(lambda a,b: "-"+a if b=='1' else a, word, hyphen_mask)[:-1] )
	else:
	return "".join( list(map(lambda a,b: "-"+a if b=='1' else a, word, hyphen_mask)) )


	def processHarnessSource(sourceFileName):
	try:
	with codecs.open(sourceFileName, 'r', 'utf-8') as f:
	lines=f.readlines()
	except IOError as e:
	raise HarnessError("Reading {file}, {msg}.".format(file=e.filename, msg=e.strerror.lower()))
	# process headerLine which should contain translation table information.
	translationTables = check_tables_exists(lines[0])
	lines.remove(lines[0])

	# Initialize harness data structure:
	harness = {}
	harness['tables'] = translationTables
	harness['tests'] = []

	translate_tests = {}
	translate_tests['data'] = []
	backtranslate_tests = {}
	backtranslate_tests['flags'] = {}
	backtranslate_tests['flags']['testmode'] = 'backtranslate'
	backtranslate_tests['data'] = []
	hyphenate_tests = {}
	hyphenate_tests['flags'] = {}
	hyphenate_tests['flags']['testmode'] = 'hyphenate'
	hyphenate_tests['data'] = []
	if args.unicodebraille:
	backtranslate_tests['flags']['outputUniBrl'] = args.unicodebraille
	hyphenate_tests['flags']['outputUniBrl'] = args.unicodebraille
	translate_tests['flags'] = {}
	translate_tests['flags']['outputUniBrl'] = args.unicodebraille

	limit = len(lines)
	testcase = {}
	# Default testmode is to translate, unless specifically overwridden.
	testmode = 'translate'

	# Process the remaining lines of the harness source.
	for i in range(limit):
	lines[i] = lines[i].rstrip('\r\n')
	if lines[i] == '': continue

	# If it is the first comment within the testcase, then add it, if it is a long comment split over multiple lines, then append it to the
	# testcase comment field.
	if (lines[i].startswith('Comment: ')) or (lines[i].startswith('comment: ')):
	if 'comment' not in testcase:
	testcase['comment'] = []
	testcase['comment'].append(lines[i][len('Comment: '):])
	continue

	# Does this line contain any processing flags
	# example:
	# brlCursorPos: 2 cursorPos: 4 mode: compbrlAtCursor
	if any(x in lines[i].split(': ') for x in possibleFlags):
	optionlist=lines[i].split(' ')
	for i in range(0, len(optionlist), 2):
	optionlist[i]=optionlist[i].replace(":", "")
	optionlist[i+1]=optionlist[i+1].replace(",", "")
	if optionlist[i] in ["brlCursorPos", "cursorPos"]:
	testcase[optionlist[i]] = int(optionlist[i+1])
	elif optionlist[i] == 'testmode' and optionlist[i+1] in test_types:
	testmode = optionlist[i+1]
	elif optionlist[i] == 'mode' and optionlist[i+1] in modes:
	testcase['mode'] = optionlist[i+1]
	continue

	# What liblouis translation mode bits should be set
	mode = modes[testcase.get('mode', '')]
	cursorPos = testcase.get('cursorPos', 0)

	# By now we have finnished processing testcase options and we know what
	# we want to do for this testcase.
	# So depending on testmode, do the needed work.

	try:
	brl = translate(translationTables, lines[i], None, cursorPos, mode)[0]
	except RuntimeError as e:
	raise HarnessError("translate impossible, a problem occured %s." % e.message)
	brl=brl.replace("", "\u007f")

	if testmode == 'translate':
	testcase['input'] = lines[i]
	testcase['output'] = brl
	translate_tests['data'].append(testcase)
	elif testmode == 'backtranslate':
	testcase['input'] = brl
	testcase['output'] = lines[i]
	backtranslate_tests['data'].append(testcase)
	if testmode == 'hyphenate':
	testcase['input'] = lines[i]
	testcase['output'] = hyphenateword(lines[i], translationTables, mode)
	hyphenate_tests['data'].append(testcase)

	testcase = {}
	# Default testmode is to translate, unless specifically overwridden.
	testmode = 'translate'

	# Harness source has been processed, prepare to output harness file.
	generatedTestcases = len(backtranslate_tests['data']) + len(translate_tests['data']) + len(hyphenate_tests['data'])
	if len(backtranslate_tests['data']):
	harness['tests'].append(backtranslate_tests)
	if len(translate_tests['data']):
	harness['tests'].append(translate_tests)
	if len(hyphenate_tests['data']):
	harness['tests'].append(hyphenate_tests)

	if not generatedTestcases:
	raise HarnessError('No testcases generated.')
	outfile = args.directory + os.path.basename(sourceFileName).replace('_source', '')
	try:
	with codecs.open(outfile, 'w', 'utf-8') as f:
	json.dump(harness, f, indent=2, sort_keys=True, ensure_ascii=False)
	except IOError as e:
	raise HarnessError("Writeing {file}, {msg}.".format(file=e.filename, msg=e.strerror.lower()))
	else:
	print("{file}: {number} testcases, please carefully inspect and apply any manual corrections.".format(file=outfile, number=generatedTestcases))

	# Begin processing input files
	for infile in args.infiles:
	try:
	processHarnessSource(infile)
	except HarnessError as e:
	print("Error: while processing {source}. {msg}".format(source=infile, msg=e.message))