From 519f6bdda850ddecc7641b1435449dfd1c64aa60 Mon Sep 17 00:00:00 2001 From: PyYoshi Date: Tue, 18 Oct 2016 09:49:24 +0900 Subject: [PATCH] update comments --- src/cchardet/_cchardet.pyx | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/cchardet/_cchardet.pyx b/src/cchardet/_cchardet.pyx index db9be40..f03f1f9 100644 --- a/src/cchardet/_cchardet.pyx +++ b/src/cchardet/_cchardet.pyx @@ -9,21 +9,29 @@ cdef extern from "charsetdetect.h": def detect_with_confidence(char *msg): cdef csd_t csd = csd_open() + + # すでにカウント済みの長さへアクセス + # strlenでは再度カウントすることになる + # https://github.com/python/cpython/blob/c30098c8c6014f3340a369a31df9c74bdbacc269/Include/bytesobject.h#L82 + # https://github.com/python/cpython/blob/c30098c8c6014f3340a369a31df9c74bdbacc269/Objects/bytesobject.c#L2490 + # https://github.com/python/cpython/blob/c30098c8c6014f3340a369a31df9c74bdbacc269/Include/object.h#L346 + # https://github.com/python/cpython/blob/c30098c8c6014f3340a369a31df9c74bdbacc269/Objects/bytesobject.c#L2410 cdef int length = len(msg) + cdef int result = csd_consider(csd, msg, length) cdef float confidence = 0.0 cdef const_char_ptr detected_charset - # ref: charsetdetect.cpp + if result == 1: # Need more data detected_charset = csd_close2(csd, &confidence) elif result == 0: # Detected early detected_charset = csd_close2(csd, &confidence) else: # Error, signal with a negative number raise Exception("Error, signal with a negative number") + if detected_charset: return detected_charset, confidence - else: - return None, None + return None, None cdef class Detector: cdef csd_t csd @@ -70,5 +78,4 @@ cdef class Detector: def result(self): if len(self._detected_charset): return self._detected_charset, self._confidence - else: - return None, None + return None, None