#!/usr/bin/env python
# coding: utf-8

import os
import sys
import glob
import codecs
import re
from distutils.command.build_ext import build_ext
from distutils import sysconfig

try:
    from setuptools import setup, Extension
except ImportError:
    from distutils.core import setup, Extension

have_cython = True
try:
    import Cython.Compiler.Main as cython_compiler
except ImportError:
    have_cython = False

cchardet_dir = 'src/cchardet/'
uchardet_dir = 'src/ext/uchardet/src'

if have_cython:
    pyx_sources = glob.glob(cchardet_dir + '*.pyx')
    sys.stderr.write('cythonize: %r\n' % (pyx_sources,))
    cython_compiler.compile(
        pyx_sources, options=cython_compiler.CompilationOptions(cplus=True, compiler_directives={"language_level": 3}))

cchardet_sources = glob.glob(cchardet_dir + '*.cpp')
sources = cchardet_sources

uchardet_sources = [
    os.path.join(uchardet_dir, 'CharDistribution.cpp'),
    os.path.join(uchardet_dir, 'JpCntx.cpp'),
    os.path.join(uchardet_dir, 'LangModels/LangArabicModel.cpp'),
    os.path.join(uchardet_dir, 'LangModels/LangBulgarianModel.cpp'),
    os.path.join(uchardet_dir, 'LangModels/LangCroatianModel.cpp'),
    os.path.join(uchardet_dir, 'LangModels/LangCzechModel.cpp'),
    os.path.join(uchardet_dir, 'LangModels/LangEsperantoModel.cpp'),
    os.path.join(uchardet_dir, 'LangModels/LangEstonianModel.cpp'),
    os.path.join(uchardet_dir, 'LangModels/LangFinnishModel.cpp'),
    os.path.join(uchardet_dir, 'LangModels/LangFrenchModel.cpp'),
    os.path.join(uchardet_dir, 'LangModels/LangDanishModel.cpp'),
    os.path.join(uchardet_dir, 'LangModels/LangGermanModel.cpp'),
    os.path.join(uchardet_dir, 'LangModels/LangGreekModel.cpp'),
    os.path.join(uchardet_dir, 'LangModels/LangHungarianModel.cpp'),
    os.path.join(uchardet_dir, 'LangModels/LangHebrewModel.cpp'),
    os.path.join(uchardet_dir, 'LangModels/LangIrishModel.cpp'),
    os.path.join(uchardet_dir, 'LangModels/LangItalianModel.cpp'),
    os.path.join(uchardet_dir, 'LangModels/LangLithuanianModel.cpp'),
    os.path.join(uchardet_dir, 'LangModels/LangLatvianModel.cpp'),
    os.path.join(uchardet_dir, 'LangModels/LangMalteseModel.cpp'),
    os.path.join(uchardet_dir, 'LangModels/LangPolishModel.cpp'),
    os.path.join(uchardet_dir, 'LangModels/LangPortugueseModel.cpp'),
    os.path.join(uchardet_dir, 'LangModels/LangRomanianModel.cpp'),
    os.path.join(uchardet_dir, 'LangModels/LangRussianModel.cpp'),
    os.path.join(uchardet_dir, 'LangModels/LangSlovakModel.cpp'),
    os.path.join(uchardet_dir, 'LangModels/LangSloveneModel.cpp'),
    os.path.join(uchardet_dir, 'LangModels/LangSwedishModel.cpp'),
    os.path.join(uchardet_dir, 'LangModels/LangSpanishModel.cpp'),
    os.path.join(uchardet_dir, 'LangModels/LangThaiModel.cpp'),
    os.path.join(uchardet_dir, 'LangModels/LangTurkishModel.cpp'),
    os.path.join(uchardet_dir, 'LangModels/LangVietnameseModel.cpp'),
    os.path.join(uchardet_dir, 'nsHebrewProber.cpp'),
    os.path.join(uchardet_dir, 'nsCharSetProber.cpp'),
    os.path.join(uchardet_dir, 'nsBig5Prober.cpp'),
    os.path.join(uchardet_dir, 'nsEUCJPProber.cpp'),
    os.path.join(uchardet_dir, 'nsEUCKRProber.cpp'),
    os.path.join(uchardet_dir, 'nsEUCTWProber.cpp'),
    os.path.join(uchardet_dir, 'nsEscCharsetProber.cpp'),
    os.path.join(uchardet_dir, 'nsEscSM.cpp'),
    os.path.join(uchardet_dir, 'nsGB2312Prober.cpp'),
    os.path.join(uchardet_dir, 'nsMBCSGroupProber.cpp'),
    os.path.join(uchardet_dir, 'nsMBCSSM.cpp'),
    os.path.join(uchardet_dir, 'nsSBCSGroupProber.cpp'),
    os.path.join(uchardet_dir, 'nsSBCharSetProber.cpp'),
    os.path.join(uchardet_dir, 'nsSJISProber.cpp'),
    os.path.join(uchardet_dir, 'nsUTF8Prober.cpp'),
    os.path.join(uchardet_dir, 'nsLatin1Prober.cpp'),
    os.path.join(uchardet_dir, 'nsUniversalDetector.cpp'),
    os.path.join(uchardet_dir, 'uchardet.cpp')
]
sources += uchardet_sources

# Remove the "-Wstrict-prototypes" compiler option, which isn't valid for C++.
cfg_vars = sysconfig.get_config_vars()
for key, value in cfg_vars.items():
    if type(value) == str:
        cfg_vars[key] = value.replace("-Wstrict-prototypes", "")
        # O3を指定したところで速度が向上するかは疑問である
        # cfg_vars[key] = value.replace("-O2", "-O3")

cchardet_module = Extension(
    'cchardet._cchardet',
    sources=sources,
    include_dirs=[uchardet_dir],
    language='c++',
)


def read(f):
    return open(os.path.join(os.path.dirname(__file__), f)).read().strip()


with codecs.open(os.path.join(os.path.abspath(os.path.dirname(__file__)), 'src', 'cchardet', 'version.py'), 'r', 'latin1') as fp:
    try:
        version = re.findall(
            r"^__version__ = '([^']+)'\r?$", fp.read(), re.M)[0]
    except IndexError:
        raise RuntimeError('Unable to determine version.')

setup(
    name='cchardet',
    author='PyYoshi',
    author_email='myoshi321go@gmail.com',
    url=r'https://github.com/PyYoshi/cChardet',
    description='cChardet is high speed universal character encoding detector.',
    long_description='\n\n'.join((read('README.rst'), read('CHANGES.rst'))),
    version=version,
    license='Mozilla Public License',
    classifiers=[
        'License :: OSI Approved :: Mozilla Public License 1.1 (MPL 1.1)',
        'License :: OSI Approved :: GNU General Public License (GPL)',
        'License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL)',
        'Programming Language :: Cython',
        'Programming Language :: Python',
        'Topic :: Software Development :: Libraries',
        'Programming Language :: Python :: 3',
        'Programming Language :: Python :: 3.5',
        'Programming Language :: Python :: 3.6',
        'Programming Language :: Python :: 3.7',
        'Programming Language :: Python :: 3.8',
    ],
    keywords=[
        'cython',
        'chardet',
        'charsetdetect'
    ],
    cmdclass={'build_ext': build_ext},
    package_dir={'': 'src'},
    packages=['cchardet', ],
    scripts=['bin/cchardetect'],
    ext_modules=[
        cchardet_module
    ],
)