Fix nsUniversalDetector overlooking the UTF-16 BOM.
This commit is contained in:
parent
ea9a21be1e
commit
9c417c0755
1 changed files with 24 additions and 21 deletions
|
@ -111,32 +111,35 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
|
||||||
//If the data starts with BOM, we know it is UTF
|
//If the data starts with BOM, we know it is UTF
|
||||||
if (mStart)
|
if (mStart)
|
||||||
{
|
{
|
||||||
mStart = PR_FALSE;
|
mStart = false;
|
||||||
if (aLen > 2)
|
if (aLen >= 2) {
|
||||||
switch (aBuf[0])
|
switch (aBuf[0]) {
|
||||||
{
|
case '\xEF':
|
||||||
case '\xEF':
|
if ((aLen > 2) && ('\xBB' == aBuf[1]) && ('\xBF' == aBuf[2])) {
|
||||||
if (('\xBB' == aBuf[1]) && ('\xBF' == aBuf[2]))
|
// EF BB BF UTF-8 encoded BOM
|
||||||
// EF BB BF UTF-8 encoded BOM
|
mDetectedCharset = "UTF-8";
|
||||||
mDetectedCharset = "UTF-8";
|
}
|
||||||
break;
|
break;
|
||||||
case '\xFE':
|
case '\xFE':
|
||||||
if ('\xFF' == aBuf[1])
|
if ('\xFF' == aBuf[1]) {
|
||||||
// FE FF UTF-16, big endian BOM
|
// FE FF UTF-16, big endian BOM
|
||||||
mDetectedCharset = "UTF-16";
|
mDetectedCharset = "UTF-16BE";
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case '\xFF':
|
case '\xFF':
|
||||||
if ('\xFE' == aBuf[1])
|
if ('\xFE' == aBuf[1]) {
|
||||||
// FF FE UTF-16, little endian BOM
|
// FF FE UTF-16, little endian BOM
|
||||||
mDetectedCharset = "UTF-16";
|
mDetectedCharset = "UTF-16LE";
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
} // switch
|
} // switch
|
||||||
|
}
|
||||||
|
|
||||||
if (mDetectedCharset)
|
if (mDetectedCharset)
|
||||||
{
|
{
|
||||||
mDone = PR_TRUE;
|
mDone = PR_TRUE;
|
||||||
return NS_OK;
|
return NS_OK;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
PRUint32 i;
|
PRUint32 i;
|
||||||
|
|
Loading…
Reference in a new issue