add test
This commit is contained in:
parent
d385fdaa9f
commit
f42e91a616
5 changed files with 34 additions and 4 deletions
|
@ -1,6 +1,12 @@
|
|||
CHANGES
|
||||
=======
|
||||
|
||||
2.1.x
|
||||
-----
|
||||
|
||||
- update language models (uchardet)
|
||||
- add iso8859-2 test but disabled it
|
||||
|
||||
2.1.4 (2018-09-27)
|
||||
------------------
|
||||
|
||||
|
|
|
@ -17,6 +17,7 @@ def detect(msg):
|
|||
encoding = encoding.decode()
|
||||
return {"encoding": encoding, "confidence": confidence}
|
||||
|
||||
|
||||
class UniversalDetector(object):
|
||||
def __init__(self):
|
||||
self._detector = _cchardet.UniversalDetector()
|
||||
|
|
|
@ -16,7 +16,8 @@ def main():
|
|||
start_chardet = time.time()
|
||||
chardet.detect(msg)
|
||||
result_chardet += (time.time() - start_chardet)
|
||||
print('chardet v%s:' % (chardet.__version__), 1 / (result_chardet / do_times), 'call(s)/s')
|
||||
print('chardet v%s:' % (chardet.__version__), 1 /
|
||||
(result_chardet / do_times), 'call(s)/s')
|
||||
|
||||
# Test cchardet
|
||||
result_cchardet = 0
|
||||
|
@ -24,7 +25,9 @@ def main():
|
|||
start_cchardet = time.time()
|
||||
cchardet.detect(msg)
|
||||
result_cchardet += (time.time() - start_cchardet)
|
||||
print('cchardet v%s:' % (cchardet.__version__), 1 / (result_cchardet / do_times), 'call(s)/s')
|
||||
print('cchardet v%s:' % (cchardet.__version__),
|
||||
1 / (result_cchardet / do_times), 'call(s)/s')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
3
src/tests/samples/iso8859-2.csv
Normal file
3
src/tests/samples/iso8859-2.csv
Normal file
|
@ -0,0 +1,3 @@
|
|||
id,name
|
||||
1,english
|
||||
2,©
|
|
|
@ -14,6 +14,7 @@ SKIP_LIST = [
|
|||
'tests/testdata/he/iso-8859-8.txt'
|
||||
]
|
||||
|
||||
|
||||
# Python can't decode encoding
|
||||
SKIP_LIST_02 = [
|
||||
'tests/testdata/vi/viscii.txt',
|
||||
|
@ -21,6 +22,7 @@ SKIP_LIST_02 = [
|
|||
]
|
||||
SKIP_LIST_02.extend(SKIP_LIST)
|
||||
|
||||
|
||||
class TestCChardet():
|
||||
def test_ascii(self):
|
||||
detected_encoding = cchardet.detect(b'abcdefghijklmnopqrstuvwxyz')
|
||||
|
@ -100,7 +102,8 @@ class TestCChardet():
|
|||
try:
|
||||
msg.decode(detected_encoding["encoding"])
|
||||
except LookupError as e:
|
||||
print("LookupError: { file=%s, encoding=%s }" % (testfile, detected_encoding["encoding"]))
|
||||
print("LookupError: { file=%s, encoding=%s }" % (
|
||||
testfile, detected_encoding["encoding"]))
|
||||
raise e
|
||||
|
||||
def test_utf8_with_bom(self):
|
||||
|
@ -126,3 +129,17 @@ class TestCChardet():
|
|||
detected_encoding['encoding']
|
||||
)
|
||||
)
|
||||
|
||||
# def test_iso8859_2_csv(self):
|
||||
# testfile = 'tests/samples/iso8859-2.csv'
|
||||
# with open(testfile, 'rb') as f:
|
||||
# msg = f.read()
|
||||
# detected_encoding = cchardet.detect(msg)
|
||||
# eq_(
|
||||
# "iso8859-2",
|
||||
# detected_encoding['encoding'].lower(),
|
||||
# 'Expected %s, but got %s' % (
|
||||
# "iso8859-2",
|
||||
# detected_encoding['encoding'].lower()
|
||||
# )
|
||||
# )
|
||||
|
|
Loading…
Reference in a new issue