From 93aed088f21b0b5039cdadbf2568881a19b9a7e1 Mon Sep 17 00:00:00 2001 From: PyYoshi Date: Tue, 26 Jun 2012 10:02:49 +0900 Subject: [PATCH] refresh --- .gitignore | 3 +- ext/libcharsetdetect/.gitignore | 4 -- setup2.py | 59 ----------------- src/cchardet/cchardet2.pyx | 109 -------------------------------- src/cchardet/nscore.pxd | 9 --- src/cchardet/prtypes.pxd | 9 --- src/cchardet/python.pxd | 5 -- src/cchardet/string.pxd | 4 -- 8 files changed, 2 insertions(+), 200 deletions(-) delete mode 100644 setup2.py delete mode 100644 src/cchardet/cchardet2.pyx delete mode 100644 src/cchardet/nscore.pxd delete mode 100644 src/cchardet/prtypes.pxd delete mode 100644 src/cchardet/python.pxd delete mode 100644 src/cchardet/string.pxd diff --git a/.gitignore b/.gitignore index 83986f0..465239d 100644 --- a/.gitignore +++ b/.gitignore @@ -34,7 +34,8 @@ nosetests.xml ext__ ext/libcharsetdetect/mozilla_ src/cchardet/*.c +src/cchardet/*.o src/cchardet/*.cpp libcharsetdetect.dll charsetdetect.h -MANIFEST \ No newline at end of file +MANIFEST diff --git a/ext/libcharsetdetect/.gitignore b/ext/libcharsetdetect/.gitignore index 0b25b4d..7cbdf74 100644 --- a/ext/libcharsetdetect/.gitignore +++ b/ext/libcharsetdetect/.gitignore @@ -24,7 +24,3 @@ CMakeCache.txt CMakeFiles/ cmake_install.cmake install_manifest.txt - -# prebuild junk -configure.bat -exec_make.bat \ No newline at end of file diff --git a/setup2.py b/setup2.py deleted file mode 100644 index 75ab9b0..0000000 --- a/setup2.py +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -import ez_setup -ez_setup.use_setuptools() -import os,sys,platform,shutil -import subprocess -from setuptools import setup, Extension -import distutils.spawn as ds -from Cython.Distutils import build_ext - -root = os.getcwd() -ext_dir = os.path.join(root,'ext_') -src_dir = os.path.join(root,'src') -build_dir = os.path.join(root,'build') -cchardet_dir = os.path.join(src_dir,'cchardet') -cchardet_source = os.path.join(cchardet_dir,"cchardet2.pyx") -charsetdetect_dir = os.path.join(ext_dir, 'libcharsetdetect') -charsetdetect_build_dir = os.path.join(charsetdetect_dir,'build') - - -cchardet_module = Extension("_cchardet", - sources = [cchardet_source], - #libraries = ['charsetdetect'], - #include_dirs = [charsetdetect_dir], - #library_dirs = [charsetdetect_build_dir], - language = "c", -) - -setup( - name = 'cchardet', - author= 'PyYoshi', - url = r"https://github.com/PyYoshi/cChardet", - description = 'Universal encoding detector', - long_description= """This library is high speed universal character encoding detector. - binding to charsetdetect. -This library is faster than chardet. -""", - version = '0.1', - classifiers = [ # http://pypi.python.org/pypi?:action=list_classifiers - 'Development Status :: 1 - Planning', - 'License :: OSI Approved :: MIT License', - 'Programming Language :: Cython', - 'Programming Language :: Python', - 'Topic :: Software Development :: Libraries', - ], - keywords = [ - 'cython', - 'chardet', - 'universal character encoding detector', - 'charsetdetect' - ], - ext_package='cchardet', - package_dir = {'':src_dir}, - packages = ['cchardet'], - cmdclass = {'build_ext': build_ext}, - ext_modules = [ - cchardet_module - ], -) \ No newline at end of file diff --git a/src/cchardet/cchardet2.pyx b/src/cchardet/cchardet2.pyx deleted file mode 100644 index 13b35fc..0000000 --- a/src/cchardet/cchardet2.pyx +++ /dev/null @@ -1,109 +0,0 @@ -# coding:utf8 - -from libc.stdlib cimport malloc, free - -cimport prtypes, src.cchardet.nscore - -cdef extern from *: - ctypedef char* const_char_ptr "const char*" - -cdef extern from "nsUniversalDetector.h": - cdef cppclass nsCharSetProber - cdef enum: - NUM_OF_CHARSET_PROBERS = 3 - cdef enum nsInputState: - ePureAscii = 0 - eEscAscii = 1 - eHighbyte = 2 - - cdef unsigned int NS_FILTER_CHINESE_SIMPLIFIED = 0x01 - cdef unsigned int NS_FILTER_CHINESE_TRADITIONAL = 0x02 - cdef unsigned int NS_FILTER_JAPANESE = 0x04 - cdef unsigned int NS_FILTER_KOREAN = 0x08 - cdef unsigned int NS_FILTER_NON_CJK = 0x10 - cdef unsigned int NS_FILTER_ALL = 0x1F - cdef unsigned int NS_FILTER_CHINESE = NS_FILTER_CHINESE_SIMPLIFIED | NS_FILTER_CHINESE_TRADITIONAL | NS_FILTER_JAPANESE | NS_FILTER_KOREAN - - cdef class nsUniversalDetector: - cdef nsUniversalDetector(self, PRUint32 aLanguageFilter) - cdef nsresult HandleData(self, const_char_ptr aBuf, PRUint32 aLen) - cdef void DataEnd(self,) - - cdef void _Report(self,const_char_ptr aCharset) - cdef void _Reset(self) - cdef nsInputState _mInputState - cdef PRBool _mDone - cdef PRBool _mInTag - cdef PRBool _mStart - cdef PRBool _mGotData - cdef char _mLastChar - cdef const_char_ptr _mDetectedCharset - cdef PRUInt32 _mBestGuess - cdef PRUint32 _mLanguageFilter - - cdef nsCharSetProber *_mCharsetProber[NUM_OF_CHARSET_PROBERS] - cdef nsCharSetProber *_mEscCharSetProber - -""" -cdef extern from *: - cdef class Detector(nsUniversalDetector): - cdef Detector(self, PRUint32 aLanguageFilter): - nsUniversalDetector(self, aLanguageFilter) - cdef int Consider(self, const_char_ptr data, int length) - cdef const_char_ptr Close(self, ) - - cdef void _Report(self, const_char_ptr aCharset) - cdef const_char_ptr *_mDetectedCharset""" - -cdef class Detector(nsUniversalDetector): - cdef Detector(self, PRUint32 aLanguageFilter): - nsUniversalDetector(self, aLanguageFilter) - - cdef void Report(self, const_char_ptr aCharset): - self._mDone = PR_TRUE - self._mDetectedCharset = aCharset - - cdef int Consider(self, const_char_ptr data, int length): - if HandleData(data,length) == NS_ERROR_OUT_OF_MEMORY: - # Error, signal with a negative number - return -1 - - if self._mDone: - # Detected early - return 0 - - # Need more data! - return 1 - - cdef const_char_ptr Close(self): - self.DataEnd() - - if not self._mDone: - if self._mInputState == eEscAscii: - return "ibm850" - elif self._mInputState == ePureAscii: - return "ASCII" - - return None - - return self._mDetectedCharset - -cdef extern from *: - ctypedef void* csd_t - cdef csd_t csd_open() - cdef int csd_consider(csd_t csd, char* data, int length) - cdef const_char_ptr csd_close(csd_t csd) - -cdef csd_t csd_open(): - # TODO: capture exceptions thrown by "new" and return -1 in that case - # TODO: provide C-land with access to the language filter constructor argument - return Detector(NS_FILTER_ALL) - -cdef int csd_consider(csd_t csd, const_char_ptr data, int length): - # return ((Detector*)csd)->Consider(data, length); - return csd.Consider(data, length) - -cdef const_char_ptr csd_close(csd_t csd): - cdef const_char_ptr result = csd.Close() - del csd - return result \ No newline at end of file diff --git a/src/cchardet/nscore.pxd b/src/cchardet/nscore.pxd deleted file mode 100644 index 00b48de..0000000 --- a/src/cchardet/nscore.pxd +++ /dev/null @@ -1,9 +0,0 @@ -# coding:utf8 - -cdef extern from "nscore.h": - # base: https://github.com/kmshi/miro/blob/5d7cdd679830169590a677632cd88a2fa27f81f5/tv/windows/plat/frontends/widgets/XULRunnerBrowser/xulrunnerbrowser.pyx - ctypedef PRUint32 nsresult - ctypedef PRUint32 PRBool - cdef enum: - NS_OK = 0 - cdef PRUint32 NS_ERROR_OUT_OF_MEMORY = 0x8007000eL \ No newline at end of file diff --git a/src/cchardet/prtypes.pxd b/src/cchardet/prtypes.pxd deleted file mode 100644 index 74d8dee..0000000 --- a/src/cchardet/prtypes.pxd +++ /dev/null @@ -1,9 +0,0 @@ -# coding:utf8 - -cdef extern from "prtypes.h": - ctypedef unsigned int PRUint32 - ctypedef int PRIntn - ctypedef PRIntn PRBool - cdef enum: - PR_TRUE = 1 - PR_FALSE = 0 \ No newline at end of file diff --git a/src/cchardet/python.pxd b/src/cchardet/python.pxd deleted file mode 100644 index 7e2b21d..0000000 --- a/src/cchardet/python.pxd +++ /dev/null @@ -1,5 +0,0 @@ -# coding:utf8 - -cdef extern from "Python.h": - void * PyMem_Malloc(size_t) - void PyMem_Free(void *) \ No newline at end of file diff --git a/src/cchardet/string.pxd b/src/cchardet/string.pxd deleted file mode 100644 index 72594f8..0000000 --- a/src/cchardet/string.pxd +++ /dev/null @@ -1,4 +0,0 @@ -# coding:utf8 - -cdef extern from "string.h": - cdef int strlen(char *s) \ No newline at end of file