Fix nsUniversalDetector overlooking the UTF-16 BOM.

2014-04-03 12:02:52 +09:00 · 2014-04-03 12:02:52 +09:00 · 9c417c0755
commit 9c417c0755
parent ea9a21be1e
1 changed files with 24 additions and 21 deletions
--- a/src/ext/libcharsetdetect/mozilla/extensions/universalchardet/src/base/nsUniversalDetector.cpp
+++ b/src/ext/libcharsetdetect/mozilla/extensions/universalchardet/src/base/nsUniversalDetector.cpp
@ -111,26 +111,29 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
  //If the data starts with BOM, we know it is UTF
  if (mStart)
  {
-    mStart = PR_FALSE;
-    if (aLen > 2)
-      switch (aBuf[0])
-        {
+    mStart = false;
+    if (aLen >= 2) {
+      switch (aBuf[0]) {
      case '\xEF':
-          if (('\xBB' == aBuf[1]) && ('\xBF' == aBuf[2]))
+        if ((aLen > 2) && ('\xBB' == aBuf[1]) && ('\xBF' == aBuf[2])) {
          // EF BB BF  UTF-8 encoded BOM
          mDetectedCharset = "UTF-8";
+        }
        break;
      case '\xFE':
-          if ('\xFF' == aBuf[1])
+        if ('\xFF' == aBuf[1]) {
          // FE FF  UTF-16, big endian BOM
-            mDetectedCharset = "UTF-16";
+          mDetectedCharset = "UTF-16BE";
+        }
        break;
      case '\xFF':
-          if ('\xFE' == aBuf[1])
+        if ('\xFE' == aBuf[1]) {
          // FF FE  UTF-16, little endian BOM
-            mDetectedCharset = "UTF-16";
+          mDetectedCharset = "UTF-16LE";
+        }
        break;
      }  // switch
+    }

    if (mDetectedCharset)
    {