update
This commit is contained in:
parent
83a7cf4f6a
commit
d841345327
2 changed files with 45 additions and 27 deletions
66
readme.md
66
readme.md
|
@ -29,29 +29,37 @@ This library is faster than [chardet](http://pypi.python.org/pypi/chardet).
|
||||||
* UTF-16LE
|
* UTF-16LE
|
||||||
* UTF-32BE
|
* UTF-32BE
|
||||||
* UTF-32LE
|
* UTF-32LE
|
||||||
* windows-1250
|
* WINDOWS-1250
|
||||||
* windows-1251
|
* WINDOWS-1251
|
||||||
* windows-1252
|
* WINDOWS-1252
|
||||||
* windows-1253
|
* WINDOWS-1253
|
||||||
* windows-1255
|
* WINDOWS-1255
|
||||||
* x-euc-tw
|
* EUC-TW
|
||||||
* X-ISO-10646-UCS-4-2143
|
* X-ISO-10646-UCS-4-2143
|
||||||
* X-ISO-10646-UCS-4-3412
|
* X-ISO-10646-UCS-4-3412
|
||||||
* x-mac-cyrillic
|
* x-mac-cyrillic
|
||||||
|
|
||||||
# Requires
|
# Requires
|
||||||
* Cython: [http://www.cython.org/](http://www.cython.org/)
|
* Cython: [http://www.cython.org/](http://www.cython.org/)
|
||||||
|
|
||||||
|
e.g.) Ubuntu 12.04
|
||||||
|
|
||||||
|
$sudo apt-get install build-essential python-dev cython
|
||||||
|
|
||||||
# Install
|
# Installation
|
||||||
1. $cd /tmp
|
$cd /tmp
|
||||||
|
|
||||||
2. $git clone git://github.com/PyYoshi/cChardet.git
|
$git clone git://github.com/PyYoshi/cChardet.git
|
||||||
|
|
||||||
3. $cd cChardet
|
$cd cChardet
|
||||||
|
|
||||||
4. $python setup.py build
|
$python setup.py build
|
||||||
|
|
||||||
5. $sudo python setup.py install
|
$sudo python setup.py install
|
||||||
|
|
||||||
|
or
|
||||||
|
|
||||||
|
$sudo easy_install cchardet
|
||||||
|
|
||||||
# Example
|
# Example
|
||||||
|
|
||||||
|
@ -66,32 +74,42 @@ print(result2)
|
||||||
```
|
```
|
||||||
|
|
||||||
# Test
|
# Test
|
||||||
* $sudo easy_install or pip install -U chardet nose
|
$sudo easy_install or pip install -U chardet nose
|
||||||
|
|
||||||
|
$cd test
|
||||||
|
|
||||||
* $nosetests --nocapture tests.py
|
$nosetests --nocapture tests.py
|
||||||
|
|
||||||
# Benchmark
|
# Benchmark
|
||||||
see [tests.TestCchardetSpeed](https://github.com/PyYoshi/cChardet/blob/master/test/tests.py#L415)
|
code: [tests.TestCchardetSpeed](https://github.com/PyYoshi/cChardet/blob/master/test/tests.py#L415)
|
||||||
|
|
||||||
### Sample(shift_jis):
|
sample: [test/testdata/wikipediaJa_One_Thousand_and_One_Nights_SJIS.txt](https://github.com/PyYoshi/cChardet/blob/master/test/testdata/wikipediaJa_One_Thousand_and_One_Nights_SJIS.txt)
|
||||||
* [test/testdata/wikipediaJa_One_Thousand_and_One_Nights_SJIS.txt](https://github.com/PyYoshi/cChardet/blob/master/test/testdata/wikipediaJa_One_Thousand_and_One_Nights_SJIS.txt)
|
|
||||||
|
|
||||||
### PC Spec.:
|
### Performance:
|
||||||
* CPU: Intel Core i7 860 2.8GHz
|
CPU: Intel Core i7 860 2.8GHz
|
||||||
|
|
||||||
* RAM: DDR3-1333 16GB
|
RAM: DDR3-1333 16GB
|
||||||
|
|
||||||
* Platform: Windows 7 HP x64, Python 2.7.3 32-bit
|
Platform: Windows 7 HP x64, Python 2.7.3 32-bit
|
||||||
|
|
||||||
### Result:
|
### Result:
|
||||||
* chardet: 4.009999990463257s, shift_jis
|
|
||||||
|
|
||||||
* cchardet: 0.0009999275207519531s, shift_jis
|
<table>
|
||||||
|
<tr>
|
||||||
|
<th></th><th>Request (call/s)</th><th>Result of encoding</th>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>chardet</td><td>0.25</td><td>shift_jis</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>cchardet</td><td>500.03</td><td>shift_jis</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
|
||||||
# License
|
# License
|
||||||
* This library files("cchardet.pyx","setup.py","tests.py") are "The MIT License".
|
* This library files("cchardet.pyx","setup.py","tests.py") are "The MIT License".
|
||||||
|
|
||||||
* Other Library License: Please, look at the "ext" directory.
|
* Other Libraries License: Please, look at the [ext](https://github.com/PyYoshi/cChardet/tree/master/src/ext) directory.
|
||||||
|
|
||||||
# Thanks
|
# Thanks
|
||||||
* [https://bitbucket.org/medoc/uchardet-enhanced/overview](https://bitbucket.org/medoc/uchardet-enhanced/overview)
|
* [https://bitbucket.org/medoc/uchardet-enhanced/overview](https://bitbucket.org/medoc/uchardet-enhanced/overview)
|
||||||
|
|
|
@ -39,9 +39,7 @@ def detect_with_confidence(char *msg):
|
||||||
cdef float confidence = 0.0
|
cdef float confidence = 0.0
|
||||||
cdef const_char_ptr detected_charset
|
cdef const_char_ptr detected_charset
|
||||||
# ref: charsetdetect.cpp
|
# ref: charsetdetect.cpp
|
||||||
if result == -1: # Error, signal with a negative number
|
if result == 1: # Need more data
|
||||||
raise Exception("Error, signal with a negative number")
|
|
||||||
elif result == 1: # Need more data
|
|
||||||
detected_charset = csd_close2(csd, &confidence)
|
detected_charset = csd_close2(csd, &confidence)
|
||||||
ret = {
|
ret = {
|
||||||
"encoding":detected_charset,
|
"encoding":detected_charset,
|
||||||
|
@ -55,4 +53,6 @@ def detect_with_confidence(char *msg):
|
||||||
"confidence":confidence
|
"confidence":confidence
|
||||||
}
|
}
|
||||||
return ret
|
return ret
|
||||||
|
else: # Error, signal with a negative number
|
||||||
|
raise Exception("Error, signal with a negative number")
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue