| """Setup utility for gcld3.""" |
| |
| import os |
| import platform |
| import shutil |
| import subprocess |
| import setuptools |
| from setuptools.command import build_ext |
| |
| __version__ = '3.0.13' |
| _NAME = 'gcld3' |
| |
| REQUIREMENTS = ['pybind11 >= 2.5.0', 'wheel >= 0.34.2'] |
| |
| PROTO_FILES = [ |
| 'src/feature_extractor.proto', |
| 'src/sentence.proto', |
| 'src/task_spec.proto', |
| ] |
| |
| SRCS = [ |
| 'src/base.cc', |
| 'src/embedding_feature_extractor.cc', |
| 'src/embedding_network.cc', |
| 'src/feature_extractor.cc', |
| 'src/feature_types.cc', |
| 'src/fml_parser.cc', |
| 'src/lang_id_nn_params.cc', |
| 'src/language_identifier_features.cc', |
| 'src/language_identifier_main.cc', |
| 'src/nnet_language_identifier.cc', |
| 'src/registry.cc', |
| 'src/relevant_script_feature.cc', |
| 'src/sentence_features.cc', |
| 'src/task_context.cc', |
| 'src/task_context_params.cc', |
| 'src/unicodetext.cc', |
| 'src/utils.cc', |
| 'src/workspace.cc', |
| 'src/script_span/fixunicodevalue.cc', |
| 'src/script_span/generated_entities.cc', |
| 'src/script_span/generated_ulscript.cc', |
| 'src/script_span/getonescriptspan.cc', |
| 'src/script_span/offsetmap.cc', |
| 'src/script_span/text_processing.cc', |
| 'src/script_span/utf8statetable.cc', |
| # These CC files have to be generated by the proto buffer compiler 'protoc' |
| 'src/cld_3/protos/feature_extractor.pb.cc', |
| 'src/cld_3/protos/sentence.pb.cc', |
| 'src/cld_3/protos/task_spec.pb.cc', |
| # pybind11 bindings |
| 'gcld3/pybind_ext.cc', |
| ] |
| |
| |
| class CompileProtos(build_ext.build_ext): |
| """Compile protocol buffers via `protoc` compiler.""" |
| |
| def run(self): |
| if shutil.which('protoc') is None: |
| raise RuntimeError('Please install the proto buffer compiler.') |
| |
| # The C++ code expect the protos to be compiled under the following |
| # directory, therefore, create it if necessary. |
| compiled_protos_dir = 'src/cld_3/protos/' |
| os.makedirs(compiled_protos_dir, exist_ok=True) |
| command = ['protoc', f'--cpp_out={compiled_protos_dir}', '--proto_path=src'] |
| command.extend(PROTO_FILES) |
| subprocess.run(command, check=True, cwd='./') |
| build_ext.build_ext.run(self) |
| |
| |
| class PyBindIncludes(object): |
| """Returns the include paths for pybind11 when needed. |
| |
| To delay the invocation of "pybind11.get_include()" until it is available |
| in the environment. This lazy evaluation allows us to install it first, then |
| import it later to determine the correct include paths. |
| """ |
| |
| def __str__(self): |
| import pybind11 # pylint: disable=g-import-not-at-top |
| return pybind11.get_include() |
| |
| |
| MACOS = platform.system() == 'Darwin' |
| ext_modules = [ |
| setuptools.Extension( |
| 'gcld3.pybind_ext', |
| sorted(SRCS), |
| include_dirs=[ |
| PyBindIncludes(), |
| ], |
| libraries=['protobuf'], |
| extra_compile_args=['-std=c++11', '-stdlib=libc++'] if MACOS else [], |
| extra_link_args=['-stdlib=libc++'] if MACOS else [], |
| language='c++'), |
| ] |
| |
| DESCRIPTION = """CLD3 is a neural network model for language identification. |
| This package contains the inference code and a trained model. See |
| https://github.com/google/cld3 for more details. |
| """ |
| |
| setuptools.setup( |
| author='Rami Al-Rfou', |
| author_email='rmyeid@google.com', |
| cmdclass={ |
| 'build_ext': CompileProtos, |
| }, |
| ext_modules=ext_modules, |
| packages=setuptools.find_packages(), |
| description='CLD3 is a neural network model for language identification.', |
| long_description=DESCRIPTION, |
| name=_NAME, |
| setup_requires=REQUIREMENTS, |
| url='https://github.com/google/cld3', |
| version=__version__, |
| zip_safe=False, |
| ) |