fix an issue where UTF-8 with a BOM would not be detected as UTF-8-SIG ( fix #28 )
This commit is contained in:
parent
1f1e8c1ab1
commit
f729e11536
2 changed files with 13 additions and 1 deletions
|
@ -1 +1 @@
|
|||
Subproject commit 795e982965404568e3a8354e69b08e71137ee6ff
|
||||
Subproject commit a2d24a50fa23d71f543501168bfe9b3a859c83cd
|
|
@ -102,3 +102,15 @@ class TestCChardet():
|
|||
except LookupError as e:
|
||||
print("LookupError: { file=%s, encoding=%s }" % (testfile, detected_encoding["encoding"]))
|
||||
raise e
|
||||
|
||||
def test_utf8_with_bom(self):
|
||||
sample = b'\xEF\xBB\xBF'
|
||||
detected_encoding = cchardet.detect(sample)
|
||||
eq_(
|
||||
"utf-8-sig",
|
||||
detected_encoding['encoding'].lower(),
|
||||
'Expected %s, but got %s' % (
|
||||
"utf-8-sig",
|
||||
detected_encoding['encoding'].lower()
|
||||
)
|
||||
)
|
||||
|
|
Loading…
Reference in a new issue