From 9c417c0755c66b45d41f2916b169247199710a23 Mon Sep 17 00:00:00 2001
From: PyYoshi <myoshi321go@gmail.com>
Date: Thu, 3 Apr 2014 12:02:52 +0900
Subject: [PATCH] Fix nsUniversalDetector overlooking the UTF-16 BOM.

---
 .../src/base/nsUniversalDetector.cpp          | 45 ++++++++++---------
 1 file changed, 24 insertions(+), 21 deletions(-)

diff --git a/src/ext/libcharsetdetect/mozilla/extensions/universalchardet/src/base/nsUniversalDetector.cpp b/src/ext/libcharsetdetect/mozilla/extensions/universalchardet/src/base/nsUniversalDetector.cpp
index 8993aae..3e34ded 100644
--- a/src/ext/libcharsetdetect/mozilla/extensions/universalchardet/src/base/nsUniversalDetector.cpp
+++ b/src/ext/libcharsetdetect/mozilla/extensions/universalchardet/src/base/nsUniversalDetector.cpp
@@ -111,32 +111,35 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
   //If the data starts with BOM, we know it is UTF
   if (mStart)
   {
-    mStart = PR_FALSE;
-    if (aLen > 2)
-      switch (aBuf[0])
-        {
-        case '\xEF':
-          if (('\xBB' == aBuf[1]) && ('\xBF' == aBuf[2]))
-            // EF BB BF  UTF-8 encoded BOM
-            mDetectedCharset = "UTF-8";
+    mStart = false;
+    if (aLen >= 2) {
+      switch (aBuf[0]) {
+      case '\xEF':
+        if ((aLen > 2) && ('\xBB' == aBuf[1]) && ('\xBF' == aBuf[2])) {
+          // EF BB BF  UTF-8 encoded BOM
+          mDetectedCharset = "UTF-8";
+        }
         break;
-        case '\xFE':
-          if ('\xFF' == aBuf[1])
-            // FE FF  UTF-16, big endian BOM
-            mDetectedCharset = "UTF-16";
+      case '\xFE':
+        if ('\xFF' == aBuf[1]) {
+          // FE FF  UTF-16, big endian BOM
+          mDetectedCharset = "UTF-16BE";
+        }
         break;
-        case '\xFF':
-          if ('\xFE' == aBuf[1])
-            // FF FE  UTF-16, little endian BOM
-            mDetectedCharset = "UTF-16";
+      case '\xFF':
+        if ('\xFE' == aBuf[1]) {
+          // FF FE  UTF-16, little endian BOM
+          mDetectedCharset = "UTF-16LE";
+        }
         break;
       }  // switch
+    }
 
-      if (mDetectedCharset)
-      {
-        mDone = PR_TRUE;
-        return NS_OK;
-      }
+    if (mDetectedCharset)
+    {
+      mDone = PR_TRUE;
+      return NS_OK;
+    }
   }
   
   PRUint32 i;