fix an issue where UTF-8 with a BOM would not be detected as UTF-8-SIG ( fix #28 )

This commit is contained in:
PyYoshi 2017-04-14 10:33:26 +09:00
parent 1f1e8c1ab1
commit f729e11536
2 changed files with 13 additions and 1 deletions

@ -1 +1 @@
Subproject commit 795e982965404568e3a8354e69b08e71137ee6ff Subproject commit a2d24a50fa23d71f543501168bfe9b3a859c83cd

View file

@ -102,3 +102,15 @@ class TestCChardet():
except LookupError as e: except LookupError as e:
print("LookupError: { file=%s, encoding=%s }" % (testfile, detected_encoding["encoding"])) print("LookupError: { file=%s, encoding=%s }" % (testfile, detected_encoding["encoding"]))
raise e raise e
def test_utf8_with_bom(self):
sample = b'\xEF\xBB\xBF'
detected_encoding = cchardet.detect(sample)
eq_(
"utf-8-sig",
detected_encoding['encoding'].lower(),
'Expected %s, but got %s' % (
"utf-8-sig",
detected_encoding['encoding'].lower()
)
)