reformat
This commit is contained in:
parent
fba4c216ee
commit
082910859b
1 changed files with 98 additions and 135 deletions
|
@ -1,49 +1,12 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
|
|
||||||
# nosetests --nocapture tests.py
|
|
||||||
from nose.tools import eq_
|
from nose.tools import eq_
|
||||||
|
|
||||||
# http://docs.python.org/library/codecs.html
|
|
||||||
# https://bitbucket.org/medoc/uchardet-enhanced/src/85fc77c3e058/libcharsetdetect/README.md
|
|
||||||
|
|
||||||
# Support codecs
|
|
||||||
# Big5
|
|
||||||
# EUC-JP
|
|
||||||
# EUC-KR
|
|
||||||
# GB18030
|
|
||||||
# gb18030
|
|
||||||
# HZ-GB-2312
|
|
||||||
# IBM855
|
|
||||||
# IBM866
|
|
||||||
# ISO-2022-CN
|
|
||||||
# ISO-2022-JP
|
|
||||||
# ISO-2022-KR
|
|
||||||
# ISO-8859-2
|
|
||||||
# ISO-8859-5
|
|
||||||
# ISO-8859-7
|
|
||||||
# ISO-8859-8
|
|
||||||
# KOI8-R
|
|
||||||
# Shift_JIS
|
|
||||||
# TIS-620
|
|
||||||
# UTF-8
|
|
||||||
# UTF-16BE
|
|
||||||
# UTF-16LE
|
|
||||||
# UTF-32BE
|
|
||||||
# UTF-32LE
|
|
||||||
# windows-1250
|
|
||||||
# windows-1251
|
|
||||||
# windows-1252
|
|
||||||
# windows-1253
|
|
||||||
# windows-1255
|
|
||||||
# x-euc-tw
|
|
||||||
# X-ISO-10646-UCS-4-2143
|
|
||||||
# X-ISO-10646-UCS-4-3412
|
|
||||||
# x-mac-cyrillic
|
|
||||||
import cchardet
|
import cchardet
|
||||||
|
|
||||||
class TestCchardet():
|
|
||||||
|
|
||||||
|
class TestCchardet():
|
||||||
def test_detect_bg_iso88595(self):
|
def test_detect_bg_iso88595(self):
|
||||||
encoding = "ISO-8859-5"
|
encoding = "ISO-8859-5"
|
||||||
path = r"testdata/bg/ISO-8859-5/wikitop_bg_ISO-8859-5.txt"
|
path = r"testdata/bg/ISO-8859-5/wikitop_bg_ISO-8859-5.txt"
|
||||||
|
@ -465,6 +428,7 @@ class TestCchardet():
|
||||||
detected_encoding = cchardet.detect(msg)
|
detected_encoding = cchardet.detect(msg)
|
||||||
eq_(encoding.lower(), detected_encoding['encoding'].lower())
|
eq_(encoding.lower(), detected_encoding['encoding'].lower())
|
||||||
|
|
||||||
|
|
||||||
class TestCchardetSpeed():
|
class TestCchardetSpeed():
|
||||||
def test_speed(self):
|
def test_speed(self):
|
||||||
try:
|
try:
|
||||||
|
@ -495,7 +459,6 @@ class TestCchardetSpeed():
|
||||||
|
|
||||||
|
|
||||||
class TestDetector():
|
class TestDetector():
|
||||||
|
|
||||||
encodings_map = {
|
encodings_map = {
|
||||||
r"testdata/bg/ISO-8859-5/wikitop_bg_ISO-8859-5.txt": "ISO-8859-5",
|
r"testdata/bg/ISO-8859-5/wikitop_bg_ISO-8859-5.txt": "ISO-8859-5",
|
||||||
r"testdata/bg/UTF-8/wikitop_bg_UTF-8.txt": "UTF-8",
|
r"testdata/bg/UTF-8/wikitop_bg_UTF-8.txt": "UTF-8",
|
||||||
|
|
Loading…
Reference in a new issue