reformat
This commit is contained in:
parent
fba4c216ee
commit
082910859b
1 changed files with 98 additions and 135 deletions
|
@ -1,49 +1,12 @@
|
|||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# nosetests --nocapture tests.py
|
||||
from nose.tools import eq_
|
||||
|
||||
# http://docs.python.org/library/codecs.html
|
||||
# https://bitbucket.org/medoc/uchardet-enhanced/src/85fc77c3e058/libcharsetdetect/README.md
|
||||
|
||||
# Support codecs
|
||||
# Big5
|
||||
# EUC-JP
|
||||
# EUC-KR
|
||||
# GB18030
|
||||
# gb18030
|
||||
# HZ-GB-2312
|
||||
# IBM855
|
||||
# IBM866
|
||||
# ISO-2022-CN
|
||||
# ISO-2022-JP
|
||||
# ISO-2022-KR
|
||||
# ISO-8859-2
|
||||
# ISO-8859-5
|
||||
# ISO-8859-7
|
||||
# ISO-8859-8
|
||||
# KOI8-R
|
||||
# Shift_JIS
|
||||
# TIS-620
|
||||
# UTF-8
|
||||
# UTF-16BE
|
||||
# UTF-16LE
|
||||
# UTF-32BE
|
||||
# UTF-32LE
|
||||
# windows-1250
|
||||
# windows-1251
|
||||
# windows-1252
|
||||
# windows-1253
|
||||
# windows-1255
|
||||
# x-euc-tw
|
||||
# X-ISO-10646-UCS-4-2143
|
||||
# X-ISO-10646-UCS-4-3412
|
||||
# x-mac-cyrillic
|
||||
import cchardet
|
||||
|
||||
class TestCchardet():
|
||||
|
||||
class TestCchardet():
|
||||
def test_detect_bg_iso88595(self):
|
||||
encoding = "ISO-8859-5"
|
||||
path = r"testdata/bg/ISO-8859-5/wikitop_bg_ISO-8859-5.txt"
|
||||
|
@ -465,6 +428,7 @@ class TestCchardet():
|
|||
detected_encoding = cchardet.detect(msg)
|
||||
eq_(encoding.lower(), detected_encoding['encoding'].lower())
|
||||
|
||||
|
||||
class TestCchardetSpeed():
|
||||
def test_speed(self):
|
||||
try:
|
||||
|
@ -495,7 +459,6 @@ class TestCchardetSpeed():
|
|||
|
||||
|
||||
class TestDetector():
|
||||
|
||||
encodings_map = {
|
||||
r"testdata/bg/ISO-8859-5/wikitop_bg_ISO-8859-5.txt": "ISO-8859-5",
|
||||
r"testdata/bg/UTF-8/wikitop_bg_UTF-8.txt": "UTF-8",
|
||||
|
|
Loading…
Reference in a new issue