Fix nsUniversalDetector overlooking the UTF-16 BOM.

2014-04-03 12:02:52 +09:00 · 2014-04-03 12:02:52 +09:00 · 9c417c0755
commit 9c417c0755
parent ea9a21be1e
1 changed files with 24 additions and 21 deletions
--- a/src/ext/libcharsetdetect/mozilla/extensions/universalchardet/src/base/nsUniversalDetector.cpp
+++ b/src/ext/libcharsetdetect/mozilla/extensions/universalchardet/src/base/nsUniversalDetector.cpp
@ -111,32 +111,35 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
  //If the data starts with BOM, we know it is UTF
  if (mStart)
  {
-    mStart = PR_FALSE;
-    if (aLen > 2)
-      switch (aBuf[0])
-        {
-        case '\xEF':
-          if (('\xBB' == aBuf[1]) && ('\xBF' == aBuf[2]))
-            // EF BB BF  UTF-8 encoded BOM
-            mDetectedCharset = "UTF-8";
+    mStart = false;
+    if (aLen >= 2) {
+      switch (aBuf[0]) {
+      case '\xEF':
+        if ((aLen > 2) && ('\xBB' == aBuf[1]) && ('\xBF' == aBuf[2])) {
+          // EF BB BF  UTF-8 encoded BOM
+          mDetectedCharset = "UTF-8";
+        }
        break;
-        case '\xFE':
-          if ('\xFF' == aBuf[1])
-            // FE FF  UTF-16, big endian BOM
-            mDetectedCharset = "UTF-16";
+      case '\xFE':
+        if ('\xFF' == aBuf[1]) {
+          // FE FF  UTF-16, big endian BOM
+          mDetectedCharset = "UTF-16BE";
+        }
        break;
-        case '\xFF':
-          if ('\xFE' == aBuf[1])
-            // FF FE  UTF-16, little endian BOM
-            mDetectedCharset = "UTF-16";
+      case '\xFF':
+        if ('\xFE' == aBuf[1]) {
+          // FF FE  UTF-16, little endian BOM
+          mDetectedCharset = "UTF-16LE";
+        }
        break;
      }  // switch
+    }

-      if (mDetectedCharset)
-      {
-        mDone = PR_TRUE;
-        return NS_OK;
-      }
+    if (mDetectedCharset)
+    {
+      mDone = PR_TRUE;
+      return NS_OK;
+    }
  }
  
  PRUint32 i;