From fb19f59f93146405e766e26cce58d9a16eaf0d6b Mon Sep 17 00:00:00 2001 From: PyYoshi Date: Sat, 7 Jul 2012 11:31:10 +0900 Subject: [PATCH] fixed "sometimes, to output invalid confidence value" bug. --- src/cchardet/cchardet.pyx | 3 +-- .../universalchardet/src/base/nsUniversalDetector.cpp | 7 ++++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/cchardet/cchardet.pyx b/src/cchardet/cchardet.pyx index 02cb15b..c506133 100644 --- a/src/cchardet/cchardet.pyx +++ b/src/cchardet/cchardet.pyx @@ -33,11 +33,10 @@ def detect(char *msg): return csd_close(csd) def detect_with_confidence(char *msg): - # TODO: fix "sometimes, to output invalid confidence value" bug. cdef csd_t csd = csd_open() cdef int length = strlen(msg) cdef int result = csd_consider(csd, msg, length) - cdef float confidence + cdef float confidence = 0.0 cdef const_char_ptr detected_charset # ref: charsetdetect.cpp if result == -1: # Error, signal with a negative number diff --git a/src/ext/libcharsetdetect/mozilla/extensions/universalchardet/src/base/nsUniversalDetector.cpp b/src/ext/libcharsetdetect/mozilla/extensions/universalchardet/src/base/nsUniversalDetector.cpp index d9e6bc4..8993aae 100644 --- a/src/ext/libcharsetdetect/mozilla/extensions/universalchardet/src/base/nsUniversalDetector.cpp +++ b/src/ext/libcharsetdetect/mozilla/extensions/universalchardet/src/base/nsUniversalDetector.cpp @@ -45,6 +45,8 @@ #include "nsEscCharsetProber.h" #include "nsLatin1Prober.h" +#include + nsUniversalDetector::nsUniversalDetector(PRUint32 aLanguageFilter) { mDone = PR_FALSE; @@ -292,6 +294,8 @@ void nsUniversalDetector::DataEnd2(float *confidence) { mDone = PR_TRUE; Report(mDetectedCharset); + // no confidence + *confidence = (float)0.99; return; } @@ -316,9 +320,10 @@ void nsUniversalDetector::DataEnd2(float *confidence) } } //do not report anything because we are not confident of it, that's in fact a negative answer - if (maxProberConfidence > MINIMUM_THRESHOLD) + if (maxProberConfidence > MINIMUM_THRESHOLD){ Report(mCharSetProbers[maxProber]->GetCharSetName()); *confidence = maxProberConfidence; + } } break; case eEscAscii: