update comments

This commit is contained in:
PyYoshi 2016-10-18 09:49:24 +09:00
parent 02715bb643
commit 519f6bdda8

View file

@ -9,21 +9,29 @@ cdef extern from "charsetdetect.h":
def detect_with_confidence(char *msg): def detect_with_confidence(char *msg):
cdef csd_t csd = csd_open() cdef csd_t csd = csd_open()
# すでにカウント済みの長さへアクセス
# strlenでは再度カウントすることになる
# https://github.com/python/cpython/blob/c30098c8c6014f3340a369a31df9c74bdbacc269/Include/bytesobject.h#L82
# https://github.com/python/cpython/blob/c30098c8c6014f3340a369a31df9c74bdbacc269/Objects/bytesobject.c#L2490
# https://github.com/python/cpython/blob/c30098c8c6014f3340a369a31df9c74bdbacc269/Include/object.h#L346
# https://github.com/python/cpython/blob/c30098c8c6014f3340a369a31df9c74bdbacc269/Objects/bytesobject.c#L2410
cdef int length = len(msg) cdef int length = len(msg)
cdef int result = csd_consider(csd, msg, length) cdef int result = csd_consider(csd, msg, length)
cdef float confidence = 0.0 cdef float confidence = 0.0
cdef const_char_ptr detected_charset cdef const_char_ptr detected_charset
# ref: charsetdetect.cpp
if result == 1: # Need more data if result == 1: # Need more data
detected_charset = csd_close2(csd, &confidence) detected_charset = csd_close2(csd, &confidence)
elif result == 0: # Detected early elif result == 0: # Detected early
detected_charset = csd_close2(csd, &confidence) detected_charset = csd_close2(csd, &confidence)
else: # Error, signal with a negative number else: # Error, signal with a negative number
raise Exception("Error, signal with a negative number") raise Exception("Error, signal with a negative number")
if detected_charset: if detected_charset:
return detected_charset, confidence return detected_charset, confidence
else: return None, None
return None, None
cdef class Detector: cdef class Detector:
cdef csd_t csd cdef csd_t csd
@ -70,5 +78,4 @@ cdef class Detector:
def result(self): def result(self):
if len(self._detected_charset): if len(self._detected_charset):
return self._detected_charset, self._confidence return self._detected_charset, self._confidence
else: return None, None
return None, None