fixed "sometimes, to output invalid confidence value" bug.
This commit is contained in:
parent
0627b9183a
commit
fb19f59f93
2 changed files with 7 additions and 3 deletions
|
@ -33,11 +33,10 @@ def detect(char *msg):
|
||||||
return csd_close(csd)
|
return csd_close(csd)
|
||||||
|
|
||||||
def detect_with_confidence(char *msg):
|
def detect_with_confidence(char *msg):
|
||||||
# TODO: fix "sometimes, to output invalid confidence value" bug.
|
|
||||||
cdef csd_t csd = csd_open()
|
cdef csd_t csd = csd_open()
|
||||||
cdef int length = strlen(msg)
|
cdef int length = strlen(msg)
|
||||||
cdef int result = csd_consider(csd, msg, length)
|
cdef int result = csd_consider(csd, msg, length)
|
||||||
cdef float confidence
|
cdef float confidence = 0.0
|
||||||
cdef const_char_ptr detected_charset
|
cdef const_char_ptr detected_charset
|
||||||
# ref: charsetdetect.cpp
|
# ref: charsetdetect.cpp
|
||||||
if result == -1: # Error, signal with a negative number
|
if result == -1: # Error, signal with a negative number
|
||||||
|
|
|
@ -45,6 +45,8 @@
|
||||||
#include "nsEscCharsetProber.h"
|
#include "nsEscCharsetProber.h"
|
||||||
#include "nsLatin1Prober.h"
|
#include "nsLatin1Prober.h"
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
nsUniversalDetector::nsUniversalDetector(PRUint32 aLanguageFilter)
|
nsUniversalDetector::nsUniversalDetector(PRUint32 aLanguageFilter)
|
||||||
{
|
{
|
||||||
mDone = PR_FALSE;
|
mDone = PR_FALSE;
|
||||||
|
@ -292,6 +294,8 @@ void nsUniversalDetector::DataEnd2(float *confidence)
|
||||||
{
|
{
|
||||||
mDone = PR_TRUE;
|
mDone = PR_TRUE;
|
||||||
Report(mDetectedCharset);
|
Report(mDetectedCharset);
|
||||||
|
// no confidence
|
||||||
|
*confidence = (float)0.99;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -316,9 +320,10 @@ void nsUniversalDetector::DataEnd2(float *confidence)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//do not report anything because we are not confident of it, that's in fact a negative answer
|
//do not report anything because we are not confident of it, that's in fact a negative answer
|
||||||
if (maxProberConfidence > MINIMUM_THRESHOLD)
|
if (maxProberConfidence > MINIMUM_THRESHOLD){
|
||||||
Report(mCharSetProbers[maxProber]->GetCharSetName());
|
Report(mCharSetProbers[maxProber]->GetCharSetName());
|
||||||
*confidence = maxProberConfidence;
|
*confidence = maxProberConfidence;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case eEscAscii:
|
case eEscAscii:
|
||||||
|
|
Loading…
Reference in a new issue