2017-05-10 07:15:42 +08:00
|
|
|
#!/usr/bin/env python
|
|
|
|
from __future__ import print_function, unicode_literals
|
|
|
|
import argparse
|
2017-05-15 08:51:37 +08:00
|
|
|
import sys
|
2017-05-10 07:15:42 +08:00
|
|
|
import cchardet
|
|
|
|
|
|
|
|
|
2017-05-15 08:51:37 +08:00
|
|
|
PY2 = sys.version_info.major == 2
|
|
|
|
|
2017-05-10 07:15:42 +08:00
|
|
|
def read_chunks(f, chunk_size):
|
|
|
|
chunk = f.read(chunk_size)
|
|
|
|
while chunk:
|
|
|
|
yield chunk
|
|
|
|
chunk = f.read(chunk_size)
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
parser = argparse.ArgumentParser()
|
2017-05-15 09:06:29 +08:00
|
|
|
parser.add_argument('files',
|
|
|
|
nargs='*',
|
|
|
|
help="Files to detect encoding of",
|
|
|
|
type=argparse.FileType('rb'),
|
|
|
|
default=[sys.stdin if PY2 else sys.stdin.buffer])
|
|
|
|
parser.add_argument('--chunk-size',
|
|
|
|
type=int,
|
|
|
|
default=(256 * 1024))
|
|
|
|
parser.add_argument('--version',
|
|
|
|
action='version',
|
|
|
|
version='%(prog)s {0}'.format(cchardet.__version__))
|
2017-05-10 07:15:42 +08:00
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
for f in args.files:
|
|
|
|
detector = cchardet.UniversalDetector()
|
|
|
|
for chunk in read_chunks(f, args.chunk_size):
|
|
|
|
detector.feed(chunk)
|
|
|
|
detector.close()
|
|
|
|
print('{file.name}: {result[encoding]} with confidence {result[confidence]}'.format(
|
|
|
|
file=f,
|
|
|
|
result=detector.result
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|