fixed "sometimes, to output invalid confidence value" bug.

This commit is contained in:
PyYoshi 2012-07-07 11:31:10 +09:00
parent 0627b9183a
commit fb19f59f93
2 changed files with 7 additions and 3 deletions

View file

@ -33,11 +33,10 @@ def detect(char *msg):
return csd_close(csd) return csd_close(csd)
def detect_with_confidence(char *msg): def detect_with_confidence(char *msg):
# TODO: fix "sometimes, to output invalid confidence value" bug.
cdef csd_t csd = csd_open() cdef csd_t csd = csd_open()
cdef int length = strlen(msg) cdef int length = strlen(msg)
cdef int result = csd_consider(csd, msg, length) cdef int result = csd_consider(csd, msg, length)
cdef float confidence cdef float confidence = 0.0
cdef const_char_ptr detected_charset cdef const_char_ptr detected_charset
# ref: charsetdetect.cpp # ref: charsetdetect.cpp
if result == -1: # Error, signal with a negative number if result == -1: # Error, signal with a negative number

View file

@ -45,6 +45,8 @@
#include "nsEscCharsetProber.h" #include "nsEscCharsetProber.h"
#include "nsLatin1Prober.h" #include "nsLatin1Prober.h"
#include <iostream>
nsUniversalDetector::nsUniversalDetector(PRUint32 aLanguageFilter) nsUniversalDetector::nsUniversalDetector(PRUint32 aLanguageFilter)
{ {
mDone = PR_FALSE; mDone = PR_FALSE;
@ -292,6 +294,8 @@ void nsUniversalDetector::DataEnd2(float *confidence)
{ {
mDone = PR_TRUE; mDone = PR_TRUE;
Report(mDetectedCharset); Report(mDetectedCharset);
// no confidence
*confidence = (float)0.99;
return; return;
} }
@ -316,10 +320,11 @@ void nsUniversalDetector::DataEnd2(float *confidence)
} }
} }
//do not report anything because we are not confident of it, that's in fact a negative answer //do not report anything because we are not confident of it, that's in fact a negative answer
if (maxProberConfidence > MINIMUM_THRESHOLD) if (maxProberConfidence > MINIMUM_THRESHOLD){
Report(mCharSetProbers[maxProber]->GetCharSetName()); Report(mCharSetProbers[maxProber]->GetCharSetName());
*confidence = maxProberConfidence; *confidence = maxProberConfidence;
} }
}
break; break;
case eEscAscii: case eEscAscii:
break; break;