From 27f61923be5bf49198f146b6121c81f593351cdb Mon Sep 17 00:00:00 2001
From: PyYoshi <yohihiro.m@gmail.com>
Date: Thu, 5 Jul 2012 12:05:11 +0900
Subject: [PATCH] add "cchardet.detect_with_confidence" method.

---
 readme.md                                     |   2 +
 setup.py                                      |   3 +-
 src/cchardet/__init__.py                      |  22 ++
 src/cchardet/cchardet.pyx                     |  27 +++
 src/ext/libcharsetdetect/charsetdetect.cpp    |  26 ++-
 src/ext/libcharsetdetect/charsetdetect.h      |   4 +-
 src/ext/libcharsetdetect/charsetdetectPriv.h  |   2 +
 .../src/base/nsUniversalDetector.cpp          |  53 ++++-
 .../src/base/nsUniversalDetector.h            |   2 +
 test/tests.py                                 | 193 +++++++++---------
 10 files changed, 235 insertions(+), 99 deletions(-)

diff --git a/readme.md b/readme.md
index 6d97606..51c6b09 100644
--- a/readme.md
+++ b/readme.md
@@ -61,6 +61,8 @@ import cchardet
 msg = file(r"test/testdata/wikipediaJa_One_Thousand_and_One_Nights_SJIS.txt").read()
 result = cchardet.detect(msg)
 print(result)
+result2 = cchardet.detect_with_confidence(msg)
+print(result2)
 ```
 
 # Test
diff --git a/setup.py b/setup.py
index 991e6c5..aea9df6 100644
--- a/setup.py
+++ b/setup.py
@@ -82,7 +82,8 @@ setup(
     long_description= """This library is high speed universal character encoding detector. - binding to charsetdetect.
 This library is faster than chardet.
 """,
-    version = '0.1',
+    version = '0.2',
+    license = 'MIT License',
     classifiers = [ # http://pypi.python.org/pypi?:action=list_classifiers
                     'Development Status :: 1 - Planning',
                     'License :: OSI Approved :: MIT License',
diff --git a/src/cchardet/__init__.py b/src/cchardet/__init__.py
index 3a29029..1b7fcab 100644
--- a/src/cchardet/__init__.py
+++ b/src/cchardet/__init__.py
@@ -4,4 +4,26 @@
 from cchardet import _cchardet
 
 def detect(msg):
+    """
+    Args:
+        msg: str
+    Returns:
+        charset: str
+    Warnings:
+        UserWarning
+    """
     return _cchardet.detect(msg)
+
+def detect_with_confidence(msg):
+    """
+    Args:
+        msg: str
+    Returns:
+        {
+            "encoding": str,
+            "confidence": float
+        }
+    Warnings:
+        UserWarning
+    """
+    return _cchardet.detect_with_confidence(msg)
\ No newline at end of file
diff --git a/src/cchardet/cchardet.pyx b/src/cchardet/cchardet.pyx
index 5483d73..f59b138 100644
--- a/src/cchardet/cchardet.pyx
+++ b/src/cchardet/cchardet.pyx
@@ -17,6 +17,7 @@ cdef extern from "charsetdetect.h":
     cdef csd_t csd_open()
     cdef int csd_consider(csd_t csd, char* data, int length)
     cdef const_char_ptr csd_close(csd_t csd)
+    cdef const_char_ptr csd_close2(csd_t csd, float *confidence)
 
 def detect(char *msg):
     cdef csd_t csd = csd_open()
@@ -30,3 +31,29 @@ def detect(char *msg):
         return csd_close(csd)
     elif result == 0: # Detected early
         return csd_close(csd)
+
+def detect_with_confidence(char *msg):
+    cdef csd_t csd = csd_open()
+    cdef int length = strlen(msg)
+    cdef int result = csd_consider(csd, msg, length)
+    cdef float confidence
+    cdef const_char_ptr detected_charset
+    # ref: charsetdetect.cpp
+    if result == -1: # Error, signal with a negative number
+        raise Exception("Error, signal with a negative number")
+    elif result == 1: # Need more data
+        warnings.warn("Need more data",UserWarning)
+        detected_charset = csd_close2(csd, &confidence)
+        ret = {
+            "encoding":detected_charset,
+            "confidence":confidence
+        }
+        return ret
+    elif result == 0: # Detected early
+        detected_charset = csd_close2(csd, &confidence)
+        ret = {
+            "encoding":detected_charset,
+            "confidence":confidence
+        }
+        return ret
+
diff --git a/src/ext/libcharsetdetect/charsetdetect.cpp b/src/ext/libcharsetdetect/charsetdetect.cpp
index 7b9de6d..4117652 100644
--- a/src/ext/libcharsetdetect/charsetdetect.cpp
+++ b/src/ext/libcharsetdetect/charsetdetect.cpp
@@ -48,7 +48,6 @@ const char *Detector::Close(void) {
 	return mDetectedCharset;
 }
 
-
 //
 // C API to the character set detector (we actually export this)
 //
@@ -68,3 +67,28 @@ const char *csd_close(csd_t csd) {
     delete ((Detector*)csd);
     return result;
 }
+
+///*
+const char *Detector::Close2(float *confidence) {
+    DataEnd2(confidence);
+
+    if (!mDone) {
+		if (mInputState == eEscAscii) {
+			return "ibm850";
+		}
+		else if (mInputState == ePureAscii) {
+			return "ASCII";
+		}
+
+		return NULL;
+	}
+
+	return mDetectedCharset;
+}
+
+const char *csd_close2(csd_t csd,float *confidence) {
+    const char *result = ((Detector*)csd)->Close2(confidence);
+    delete ((Detector*)csd);
+    return result;
+}
+//*/
\ No newline at end of file
diff --git a/src/ext/libcharsetdetect/charsetdetect.h b/src/ext/libcharsetdetect/charsetdetect.h
index c93f2e0..6cfd1d0 100644
--- a/src/ext/libcharsetdetect/charsetdetect.h
+++ b/src/ext/libcharsetdetect/charsetdetect.h
@@ -21,7 +21,9 @@ int csd_consider(csd_t csd, const char *data, int length);
 // Closes the character set detector and returns the detected character set name as an ASCII string.
 // Returns NULL if detection failed.
 const char *csd_close(csd_t csd);
-    
+
+const char *csd_close2(csd_t csd,float *confidence);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/ext/libcharsetdetect/charsetdetectPriv.h b/src/ext/libcharsetdetect/charsetdetectPriv.h
index 50a9d55..c3c2ecc 100644
--- a/src/ext/libcharsetdetect/charsetdetectPriv.h
+++ b/src/ext/libcharsetdetect/charsetdetectPriv.h
@@ -11,6 +11,8 @@ public:
 	Detector(PRUint32 aLanguageFilter) : nsUniversalDetector(aLanguageFilter) {};
     int Consider(const char *data, int length);
     const char *Close(void);
+
+    const char *Close2(float *confidence);
 protected:
 	void Report(const char* aCharset);
     const char *mDetectedCharset;
diff --git a/src/ext/libcharsetdetect/mozilla/extensions/universalchardet/src/base/nsUniversalDetector.cpp b/src/ext/libcharsetdetect/mozilla/extensions/universalchardet/src/base/nsUniversalDetector.cpp
index f3c63d2..d9e6bc4 100644
--- a/src/ext/libcharsetdetect/mozilla/extensions/universalchardet/src/base/nsUniversalDetector.cpp
+++ b/src/ext/libcharsetdetect/mozilla/extensions/universalchardet/src/base/nsUniversalDetector.cpp
@@ -228,7 +228,6 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
   return NS_OK;
 }
 
-
 //---------------------------------------------------------------------
 void nsUniversalDetector::DataEnd()
 {
@@ -278,3 +277,55 @@ void nsUniversalDetector::DataEnd()
   }
   return;
 }
+
+///*
+void nsUniversalDetector::DataEnd2(float *confidence)
+{
+  if (!mGotData)
+  {
+    // we haven't got any data yet, return immediately
+    // caller program sometimes call DataEnd before anything has been sent to detector
+    return;
+  }
+
+  if (mDetectedCharset)
+  {
+    mDone = PR_TRUE;
+    Report(mDetectedCharset);
+    return;
+  }
+
+  switch (mInputState)
+  {
+  case eHighbyte:
+    {
+      float proberConfidence;
+      float maxProberConfidence = (float)0.0;
+      PRInt32 maxProber = 0;
+
+      for (PRInt32 i = 0; i < NUM_OF_CHARSET_PROBERS; i++)
+      {
+        if (mCharSetProbers[i])
+        {
+          proberConfidence = mCharSetProbers[i]->GetConfidence();
+          if (proberConfidence > maxProberConfidence)
+          {
+            maxProberConfidence = proberConfidence;
+            maxProber = i;
+          }
+        }
+      }
+      //do not report anything because we are not confident of it, that's in fact a negative answer
+      if (maxProberConfidence > MINIMUM_THRESHOLD)
+        Report(mCharSetProbers[maxProber]->GetCharSetName());
+        *confidence = maxProberConfidence;
+    }
+    break;
+  case eEscAscii:
+    break;
+  default:
+    ;
+  }
+  return;
+}
+//*/
\ No newline at end of file
diff --git a/src/ext/libcharsetdetect/mozilla/extensions/universalchardet/src/base/nsUniversalDetector.h b/src/ext/libcharsetdetect/mozilla/extensions/universalchardet/src/base/nsUniversalDetector.h
index 525f722..598b813 100644
--- a/src/ext/libcharsetdetect/mozilla/extensions/universalchardet/src/base/nsUniversalDetector.h
+++ b/src/ext/libcharsetdetect/mozilla/extensions/universalchardet/src/base/nsUniversalDetector.h
@@ -68,6 +68,8 @@ public:
    virtual nsresult HandleData(const char* aBuf, PRUint32 aLen);
    virtual void DataEnd(void);
 
+   virtual void DataEnd2(float *confidence);
+
 protected:
    virtual void Report(const char* aCharset) = 0;
    virtual void Reset();
diff --git a/test/tests.py b/test/tests.py
index da2b3f0..9a55513 100644
--- a/test/tests.py
+++ b/test/tests.py
@@ -48,369 +48,369 @@ class TestCchardet():
         encoding = "ISO-8859-5"
         path = r"testdata/bg/ISO-8859-5/wikitop_bg_ISO-8859-5.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_bg_utf8(self):
         encoding = "UTF-8"
         path = r"testdata/bg/UTF-8/wikitop_bg_UTF-8.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_bg_windows1251(self):
         encoding = "WINDOWS-1251"
         path = r"testdata/bg/WINDOWS-1251/wikitop_bg_WINDOWS-1251.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_cz_iso88592(self):
         encoding = "ISO-8859-2"
         path = r"testdata/cz/ISO-8859-2/wikitop_cz_ISO-8859-2.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_cz_utf8(self):
         encoding = "UTF-8"
         path = r"testdata/cz/UTF-8/wikitop_cz_UTF-8.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_de_utf8(self):
         encoding = "UTF-8"
         path = r"testdata/de/UTF-8/wikitop_de_UTF-8.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_de_windows1252(self):
         encoding = "WINDOWS-1252"
         path = r"testdata/de/WINDOWS-1252/wikitop_de_WINDOWS-1252.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_dk_utf8(self):
         encoding = "UTF-8"
         path = r"testdata/dk/UTF-8/wikitop_dk_UTF-8.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_dk_windows1252(self):
         encoding = "WINDOWS-1252"
         path = r"testdata/dk/WINDOWS-1252/wikitop_dk_WINDOWS-1252.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_el_iso88597(self):
         encoding = "ISO-8859-7"
         path = r"testdata/el/ISO-8859-7/wikitop_el_ISO-8859-7.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_el_utf8(self):
         encoding = "UTF-8"
         path = r"testdata/el/UTF-8/wikitop_el_UTF-8.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_en_utf8(self):
         encoding = "UTF-8"
         path = r"testdata/en/UTF-8/wikitop_en_UTF-8.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_en_windows1252(self):
         encoding = "WINDOWS-1252"
         path = r"testdata/en/WINDOWS-1252/wikitop_en_WINDOWS-1252.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_es_utf8(self):
         encoding = "UTF-8"
         path = r"testdata/es/UTF-8/wikitop_es_UTF-8.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_es_windows1252(self):
         encoding = "WINDOWS-1252"
         path = r"testdata/es/WINDOWS-1252/wikitop_es_WINDOWS-1252.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_fi_utf8(self):
         encoding = "UTF-8"
         path = r"testdata/fi/UTF-8/wikitop_fi_UTF-8.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_fi_windows1252(self):
         encoding = "WINDOWS-1252"
         path = r"testdata/fi/WINDOWS-1252/wikitop_fi_WINDOWS-1252.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_fr_utf8(self):
         encoding = "UTF-8"
         path = r"testdata/fr/UTF-8/wikitop_fr_UTF-8.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_fr_windows1252(self):
         encoding = "WINDOWS-1252"
         path = r"testdata/fr/WINDOWS-1252/wikitop_fr_WINDOWS-1252.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_he_utf8(self):
         encoding = "UTF-8"
         path = r"testdata/he/UTF-8/wikitop_he_UTF-8.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_he_windows1255(self):
         encoding = "WINDOWS-1255"
         path = r"testdata/he/WINDOWS-1255/wikitop_he_WINDOWS-1255.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_hu_utf8(self):
         encoding = "UTF-8"
         path = r"testdata/hu/UTF-8/wikitop_hu_UTF-8.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_hu_iso55892(self):
         encoding = "ISO-8859-2"
         path = r"testdata/hu/ISO-8859-2/wikitop_hu_ISO-8859-2.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_it_utf8(self):
         encoding = "UTF-8"
         path = r"testdata/it/UTF-8/wikitop_it_UTF-8.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_it_windows1252(self):
         encoding = "WINDOWS-1252"
         path = r"testdata/it/WINDOWS-1252/wikitop_it_WINDOWS-1252.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_nl_utf8(self):
         encoding = "UTF-8"
         path = r"testdata/nl/UTF-8/wikitop_nl_UTF-8.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_nl_windows1252(self):
         encoding = "WINDOWS-1252"
         path = r"testdata/nl/WINDOWS-1252/wikitop_nl_WINDOWS-1252.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_no_utf8(self):
         encoding = "UTF-8"
         path = r"testdata/no/UTF-8/wikitop_no_UTF-8.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_no_windows1252(self):
         encoding = "WINDOWS-1252"
         path = r"testdata/no/WINDOWS-1252/wikitop_no_WINDOWS-1252.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_pl_utf8(self):
         encoding = "UTF-8"
         path = r"testdata/pl/UTF-8/wikitop_pl_UTF-8.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_pl_iso88592(self):
         encoding = "ISO-8859-2"
         path = r"testdata/pl/ISO-8859-2/wikitop_pl_ISO-8859-2.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_pt_utf8(self):
         encoding = "UTF-8"
         path = r"testdata/pt/UTF-8/wikitop_pt_UTF-8.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_pt_windows1252(self):
         encoding = "WINDOWS-1252"
         path = r"testdata/pt/WINDOWS-1252/wikitop_pt_WINDOWS-1252.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_ru_utf8(self):
         encoding = "UTF-8"
         path = r"testdata/ru/UTF-8/wikitop_ru_UTF-8.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_ru_windows1251(self):
         encoding = "WINDOWS-1251"
         path = r"testdata/ru/WINDOWS-1251/wikitop_ru_WINDOWS-1251.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_ru_ibm855(self):
         encoding = "IBM855"
         path = r"testdata/ru/IBM855/wikitop_ru_IBM855.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_ru_koi8r(self):
         encoding = "KOI8-R"
         path = r"testdata/ru/KOI8-R/wikitop_ru_KOI8-R.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_se_utf8(self):
         encoding = "UTF-8"
         path = r"testdata/se/UTF-8/wikitop_se_UTF-8.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_se_windows1252(self):
         encoding = "WINDOWS-1252"
         path = r"testdata/se/WINDOWS-1252/wikitop_se_WINDOWS-1252.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_th_utf8(self):
         encoding = "UTF-8"
         path = r"testdata/th/UTF-8/wikitop_th_UTF-8.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_th_tis620_1(self):
         encoding = "TIS-620"
         path = r"testdata/th/TIS-620/utffool_th_TIS-620.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_th_tis620_2(self):
         encoding = "TIS-620"
         path = r"testdata/th/TIS-620/wikitop_th_TIS-620.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_tr_utf8(self):
         encoding = "UTF-8"
         path = r"testdata/tr/UTF-8/wikitop_tr_UTF-8.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_tr_iso88599(self):
         encoding = "ISO-8859-9"
         path = r"testdata/tr/ISO-8859-9/wikitop_tr_ISO-8859-9.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_zh_utf8(self):
         encoding = "UTF-8"
         path = r"testdata/zh/UTF-8/wikitop_zh_UTF-8.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
     def test_detect_zh_gb18030(self):
         encoding = "GB18030"
         path = r"testdata/zh/GB18030/wikitop_zh_GB18030.txt"
         msg =file(path).read()
-        detected_encoding = cchardet.detect(msg)
+        detected_encoding = cchardet.detect_with_confidence(msg)
         print(detected_encoding)
-        eq_(encoding.lower(),detected_encoding.lower())
+        eq_(encoding.lower(),detected_encoding['encoding'].lower())
 
 class TestCchardetSpeed():
     def test_speed(self):
@@ -424,10 +424,13 @@ class TestCchardetSpeed():
         end_chardet = time.time()
         # Test cchardet
         start_cchardet = time.time()
-        detected_encoding_cchardet = cchardet.detect(msg)
+        detected_encoding_cchardet = cchardet.detect_with_confidence(msg)
         end_cchardet = time.time()
         # print result
         result_chardet = end_chardet - start_chardet
         result_cchardet = end_cchardet - start_cchardet
-        print("chardet:",result_chardet,"detected charset:", detected_encoding_chardet['encoding'].lower())
-        print("cchardet:",result_cchardet,"detected charset:",detected_encoding_cchardet.lower())
\ No newline at end of file
+        print()
+        print(detected_encoding_cchardet)
+        print()
+        print("chardet:",result_chardet,"detected charset:", detected_encoding_chardet['encoding'].lower(), "confidence:", detected_encoding_chardet['confidence'])
+        print("cchardet:",result_cchardet,"detected charset:",detected_encoding_cchardet['encoding'].lower(), "confidence:", detected_encoding_cchardet['confidence'])