diff --git a/src/cchardet/_cchardet.pyx b/src/cchardet/_cchardet.pyx index db9be40..f03f1f9 100644 --- a/src/cchardet/_cchardet.pyx +++ b/src/cchardet/_cchardet.pyx @@ -9,21 +9,29 @@ cdef extern from "charsetdetect.h": def detect_with_confidence(char *msg): cdef csd_t csd = csd_open() + + # すでにカウント済みの長さへアクセス + # strlenでは再度カウントすることになる + # https://github.com/python/cpython/blob/c30098c8c6014f3340a369a31df9c74bdbacc269/Include/bytesobject.h#L82 + # https://github.com/python/cpython/blob/c30098c8c6014f3340a369a31df9c74bdbacc269/Objects/bytesobject.c#L2490 + # https://github.com/python/cpython/blob/c30098c8c6014f3340a369a31df9c74bdbacc269/Include/object.h#L346 + # https://github.com/python/cpython/blob/c30098c8c6014f3340a369a31df9c74bdbacc269/Objects/bytesobject.c#L2410 cdef int length = len(msg) + cdef int result = csd_consider(csd, msg, length) cdef float confidence = 0.0 cdef const_char_ptr detected_charset - # ref: charsetdetect.cpp + if result == 1: # Need more data detected_charset = csd_close2(csd, &confidence) elif result == 0: # Detected early detected_charset = csd_close2(csd, &confidence) else: # Error, signal with a negative number raise Exception("Error, signal with a negative number") + if detected_charset: return detected_charset, confidence - else: - return None, None + return None, None cdef class Detector: cdef csd_t csd @@ -70,5 +78,4 @@ cdef class Detector: def result(self): if len(self._detected_charset): return self._detected_charset, self._confidence - else: - return None, None + return None, None