fixed "sometimes, to output invalid confidence value" bug.

This commit is contained in:
PyYoshi 2012-07-07 11:31:10 +09:00
parent 0627b9183a
commit fb19f59f93
2 changed files with 7 additions and 3 deletions

View file

@ -33,11 +33,10 @@ def detect(char *msg):
return csd_close(csd)
def detect_with_confidence(char *msg):
# TODO: fix "sometimes, to output invalid confidence value" bug.
cdef csd_t csd = csd_open()
cdef int length = strlen(msg)
cdef int result = csd_consider(csd, msg, length)
cdef float confidence
cdef float confidence = 0.0
cdef const_char_ptr detected_charset
# ref: charsetdetect.cpp
if result == -1: # Error, signal with a negative number

View file

@ -45,6 +45,8 @@
#include "nsEscCharsetProber.h"
#include "nsLatin1Prober.h"
#include <iostream>
nsUniversalDetector::nsUniversalDetector(PRUint32 aLanguageFilter)
{
mDone = PR_FALSE;
@ -292,6 +294,8 @@ void nsUniversalDetector::DataEnd2(float *confidence)
{
mDone = PR_TRUE;
Report(mDetectedCharset);
// no confidence
*confidence = (float)0.99;
return;
}
@ -316,9 +320,10 @@ void nsUniversalDetector::DataEnd2(float *confidence)
}
}
//do not report anything because we are not confident of it, that's in fact a negative answer
if (maxProberConfidence > MINIMUM_THRESHOLD)
if (maxProberConfidence > MINIMUM_THRESHOLD){
Report(mCharSetProbers[maxProber]->GetCharSetName());
*confidence = maxProberConfidence;
}
}
break;
case eEscAscii: