From 9c417c0755c66b45d41f2916b169247199710a23 Mon Sep 17 00:00:00 2001 From: PyYoshi Date: Thu, 3 Apr 2014 12:02:52 +0900 Subject: [PATCH] Fix nsUniversalDetector overlooking the UTF-16 BOM. --- .../src/base/nsUniversalDetector.cpp | 45 ++++++++++--------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/src/ext/libcharsetdetect/mozilla/extensions/universalchardet/src/base/nsUniversalDetector.cpp b/src/ext/libcharsetdetect/mozilla/extensions/universalchardet/src/base/nsUniversalDetector.cpp index 8993aae..3e34ded 100644 --- a/src/ext/libcharsetdetect/mozilla/extensions/universalchardet/src/base/nsUniversalDetector.cpp +++ b/src/ext/libcharsetdetect/mozilla/extensions/universalchardet/src/base/nsUniversalDetector.cpp @@ -111,32 +111,35 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen) //If the data starts with BOM, we know it is UTF if (mStart) { - mStart = PR_FALSE; - if (aLen > 2) - switch (aBuf[0]) - { - case '\xEF': - if (('\xBB' == aBuf[1]) && ('\xBF' == aBuf[2])) - // EF BB BF UTF-8 encoded BOM - mDetectedCharset = "UTF-8"; + mStart = false; + if (aLen >= 2) { + switch (aBuf[0]) { + case '\xEF': + if ((aLen > 2) && ('\xBB' == aBuf[1]) && ('\xBF' == aBuf[2])) { + // EF BB BF UTF-8 encoded BOM + mDetectedCharset = "UTF-8"; + } break; - case '\xFE': - if ('\xFF' == aBuf[1]) - // FE FF UTF-16, big endian BOM - mDetectedCharset = "UTF-16"; + case '\xFE': + if ('\xFF' == aBuf[1]) { + // FE FF UTF-16, big endian BOM + mDetectedCharset = "UTF-16BE"; + } break; - case '\xFF': - if ('\xFE' == aBuf[1]) - // FF FE UTF-16, little endian BOM - mDetectedCharset = "UTF-16"; + case '\xFF': + if ('\xFE' == aBuf[1]) { + // FF FE UTF-16, little endian BOM + mDetectedCharset = "UTF-16LE"; + } break; } // switch + } - if (mDetectedCharset) - { - mDone = PR_TRUE; - return NS_OK; - } + if (mDetectedCharset) + { + mDone = PR_TRUE; + return NS_OK; + } } PRUint32 i;