cChardet/bin/cchardetect

45 lines
1.3 KiB
Text
Raw Permalink Normal View History

#!/usr/bin/env python
from __future__ import print_function, unicode_literals
import argparse
2017-05-15 08:51:37 +08:00
import sys
import cchardet
2017-05-15 08:51:37 +08:00
PY2 = sys.version_info.major == 2
def read_chunks(f, chunk_size):
chunk = f.read(chunk_size)
while chunk:
yield chunk
chunk = f.read(chunk_size)
def main():
parser = argparse.ArgumentParser()
2017-05-15 09:06:29 +08:00
parser.add_argument('files',
nargs='*',
help="Files to detect encoding of",
type=argparse.FileType('rb'),
default=[sys.stdin if PY2 else sys.stdin.buffer])
parser.add_argument('--chunk-size',
type=int,
default=(256 * 1024))
parser.add_argument('--version',
action='version',
version='%(prog)s {0}'.format(cchardet.__version__))
args = parser.parse_args()
for f in args.files:
detector = cchardet.UniversalDetector()
for chunk in read_chunks(f, args.chunk_size):
detector.feed(chunk)
detector.close()
print('{file.name}: {result[encoding]} with confidence {result[confidence]}'.format(
file=f,
result=detector.result
))
if __name__ == '__main__':
main()