From 31cf5be8d15e23a83f8590acabcb5dc9bf3a979a Mon Sep 17 00:00:00 2001 From: Yoshishiro Misawa Date: Tue, 3 Dec 2013 10:31:45 +0900 Subject: [PATCH] test: add maccyrillic --- .../X-MAC-CYRILLIC/wikitop_ru_MACCYRILLIC.txt | 157 ++++++++++++++++++ test/tests.py | 11 +- 2 files changed, 167 insertions(+), 1 deletion(-) create mode 100644 test/testdata/ru/X-MAC-CYRILLIC/wikitop_ru_MACCYRILLIC.txt diff --git a/test/testdata/ru/X-MAC-CYRILLIC/wikitop_ru_MACCYRILLIC.txt b/test/testdata/ru/X-MAC-CYRILLIC/wikitop_ru_MACCYRILLIC.txt new file mode 100644 index 0000000..03738d0 --- /dev/null +++ b/test/testdata/ru/X-MAC-CYRILLIC/wikitop_ru_MACCYRILLIC.txt @@ -0,0 +1,157 @@ + # (ru) copyright Atom- + + + + + 19 2011. + . + / + 19 2011. + . + : , + + + , + + , . + 779 821 . + + + + + + + + + + + + + + + + ( ) + + + + + II + + ǒ + 3 (16) 1907 II + , + . + + II + , + - . . , , + (-, + -, ) + . II , 20 1907 , + , I . + , + , , + . + + ( ) + , + ; + , , + . + + () + (471) | | + + + + 19921996 Ս + + ǎ (. Orlando Magic) + , - + . 1989 + , nj , + . , , + ǝ-, 2010 . + , Ս, , + , , , , + . 22 13 -, 5 + 2 ( 2004 + 2009 ). 2011 , + . + () + (1194) | | + + + + + : + ESEFUL.jpg + , + (19882001) + : + Australia stub.svg + + | | + + + + (Pan troglodytes) . + , . + (Pan troglodytes) . + , . + + + ? + + + + * + ( ) + . + * Halomonas titanicae ǒ. + * + . + * , , + . + + ǃ + * ( ) 5 + . + * Lj + . + * - + . + * . + * , ? + + . + * , + . + + | | | + + 21 + + + * 1097 + * 1520 + + * 1578 ( .) + * 1727 + + * 1805 + * 1858 ǎ , + + * 1895 , + + * 1967 + + ǂ 21 2011 + + + + + + + ǝ-. + \ No newline at end of file diff --git a/test/tests.py b/test/tests.py index 82c916d..14445a3 100644 --- a/test/tests.py +++ b/test/tests.py @@ -376,6 +376,15 @@ class TestCchardet(): print(detected_encoding) eq_(encoding.lower(),detected_encoding['encoding'].lower()) + def test_detect_ru_maccyrillic(self): + encoding = "MAC-CYRILLIC" + path = r"testdata/ru/X-MAC-CYRILLIC/wikitop_ru_MACCYRILLIC.txt" + with open(path, 'rb') as f: + msg = f.read() + detected_encoding = cchardet.detect(msg) + print(detected_encoding) + eq_(encoding.lower(),detected_encoding['encoding'].lower()) + def test_detect_se_utf8(self): encoding = "UTF-8" path = r"testdata/se/UTF-8/wikitop_se_UTF-8.txt" @@ -482,4 +491,4 @@ class TestCchardetSpeed(): start_cchardet = time.time() cchardet.detect(msg) result_cchardet += (time.time() - start_cchardet) - print('cchardet:',1/(result_cchardet/do_times), 'call(s)/s') \ No newline at end of file + print('cchardet:',1/(result_cchardet/do_times), 'call(s)/s')