Fix nsUniversalDetector overlooking the UTF-16 BOM.

This commit is contained in:
PyYoshi 2014-04-03 12:02:52 +09:00
parent ea9a21be1e
commit 9c417c0755

View file

@ -111,32 +111,35 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
//If the data starts with BOM, we know it is UTF //If the data starts with BOM, we know it is UTF
if (mStart) if (mStart)
{ {
mStart = PR_FALSE; mStart = false;
if (aLen > 2) if (aLen >= 2) {
switch (aBuf[0]) switch (aBuf[0]) {
{ case '\xEF':
case '\xEF': if ((aLen > 2) && ('\xBB' == aBuf[1]) && ('\xBF' == aBuf[2])) {
if (('\xBB' == aBuf[1]) && ('\xBF' == aBuf[2])) // EF BB BF UTF-8 encoded BOM
// EF BB BF UTF-8 encoded BOM mDetectedCharset = "UTF-8";
mDetectedCharset = "UTF-8"; }
break; break;
case '\xFE': case '\xFE':
if ('\xFF' == aBuf[1]) if ('\xFF' == aBuf[1]) {
// FE FF UTF-16, big endian BOM // FE FF UTF-16, big endian BOM
mDetectedCharset = "UTF-16"; mDetectedCharset = "UTF-16BE";
}
break; break;
case '\xFF': case '\xFF':
if ('\xFE' == aBuf[1]) if ('\xFE' == aBuf[1]) {
// FF FE UTF-16, little endian BOM // FF FE UTF-16, little endian BOM
mDetectedCharset = "UTF-16"; mDetectedCharset = "UTF-16LE";
}
break; break;
} // switch } // switch
}
if (mDetectedCharset) if (mDetectedCharset)
{ {
mDone = PR_TRUE; mDone = PR_TRUE;
return NS_OK; return NS_OK;
} }
} }
PRUint32 i; PRUint32 i;