This commit is contained in:
PyYoshi 2016-10-17 11:05:27 +09:00
parent fba4c216ee
commit 082910859b

View file

@ -1,49 +1,12 @@
#!/usr/bin/env python
# coding: utf-8
# nosetests --nocapture tests.py
from nose.tools import eq_
# http://docs.python.org/library/codecs.html
# https://bitbucket.org/medoc/uchardet-enhanced/src/85fc77c3e058/libcharsetdetect/README.md
# Support codecs
# Big5
# EUC-JP
# EUC-KR
# GB18030
# gb18030
# HZ-GB-2312
# IBM855
# IBM866
# ISO-2022-CN
# ISO-2022-JP
# ISO-2022-KR
# ISO-8859-2
# ISO-8859-5
# ISO-8859-7
# ISO-8859-8
# KOI8-R
# Shift_JIS
# TIS-620
# UTF-8
# UTF-16BE
# UTF-16LE
# UTF-32BE
# UTF-32LE
# windows-1250
# windows-1251
# windows-1252
# windows-1253
# windows-1255
# x-euc-tw
# X-ISO-10646-UCS-4-2143
# X-ISO-10646-UCS-4-3412
# x-mac-cyrillic
import cchardet
class TestCchardet():
class TestCchardet():
def test_detect_bg_iso88595(self):
encoding = "ISO-8859-5"
path = r"testdata/bg/ISO-8859-5/wikitop_bg_ISO-8859-5.txt"
@ -465,6 +428,7 @@ class TestCchardet():
detected_encoding = cchardet.detect(msg)
eq_(encoding.lower(), detected_encoding['encoding'].lower())
class TestCchardetSpeed():
def test_speed(self):
try:
@ -495,7 +459,6 @@ class TestCchardetSpeed():
class TestDetector():
encodings_map = {
r"testdata/bg/ISO-8859-5/wikitop_bg_ISO-8859-5.txt": "ISO-8859-5",
r"testdata/bg/UTF-8/wikitop_bg_UTF-8.txt": "UTF-8",