diff --git a/src/ext/libcharsetdetect/mozilla/extensions/universalchardet/src/base/nsUniversalDetector.cpp b/src/ext/libcharsetdetect/mozilla/extensions/universalchardet/src/base/nsUniversalDetector.cpp index 8993aae..3e34ded 100644 --- a/src/ext/libcharsetdetect/mozilla/extensions/universalchardet/src/base/nsUniversalDetector.cpp +++ b/src/ext/libcharsetdetect/mozilla/extensions/universalchardet/src/base/nsUniversalDetector.cpp @@ -111,32 +111,35 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen) //If the data starts with BOM, we know it is UTF if (mStart) { - mStart = PR_FALSE; - if (aLen > 2) - switch (aBuf[0]) - { - case '\xEF': - if (('\xBB' == aBuf[1]) && ('\xBF' == aBuf[2])) - // EF BB BF UTF-8 encoded BOM - mDetectedCharset = "UTF-8"; + mStart = false; + if (aLen >= 2) { + switch (aBuf[0]) { + case '\xEF': + if ((aLen > 2) && ('\xBB' == aBuf[1]) && ('\xBF' == aBuf[2])) { + // EF BB BF UTF-8 encoded BOM + mDetectedCharset = "UTF-8"; + } break; - case '\xFE': - if ('\xFF' == aBuf[1]) - // FE FF UTF-16, big endian BOM - mDetectedCharset = "UTF-16"; + case '\xFE': + if ('\xFF' == aBuf[1]) { + // FE FF UTF-16, big endian BOM + mDetectedCharset = "UTF-16BE"; + } break; - case '\xFF': - if ('\xFE' == aBuf[1]) - // FF FE UTF-16, little endian BOM - mDetectedCharset = "UTF-16"; + case '\xFF': + if ('\xFE' == aBuf[1]) { + // FF FE UTF-16, little endian BOM + mDetectedCharset = "UTF-16LE"; + } break; } // switch + } - if (mDetectedCharset) - { - mDone = PR_TRUE; - return NS_OK; - } + if (mDetectedCharset) + { + mDone = PR_TRUE; + return NS_OK; + } } PRUint32 i;