Fix nsUniversalDetector overlooking the UTF-16 BOM.
This commit is contained in:
parent
ea9a21be1e
commit
9c417c0755
1 changed files with 24 additions and 21 deletions
|
@ -111,32 +111,35 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
|
|||
//If the data starts with BOM, we know it is UTF
|
||||
if (mStart)
|
||||
{
|
||||
mStart = PR_FALSE;
|
||||
if (aLen > 2)
|
||||
switch (aBuf[0])
|
||||
{
|
||||
case '\xEF':
|
||||
if (('\xBB' == aBuf[1]) && ('\xBF' == aBuf[2]))
|
||||
// EF BB BF UTF-8 encoded BOM
|
||||
mDetectedCharset = "UTF-8";
|
||||
mStart = false;
|
||||
if (aLen >= 2) {
|
||||
switch (aBuf[0]) {
|
||||
case '\xEF':
|
||||
if ((aLen > 2) && ('\xBB' == aBuf[1]) && ('\xBF' == aBuf[2])) {
|
||||
// EF BB BF UTF-8 encoded BOM
|
||||
mDetectedCharset = "UTF-8";
|
||||
}
|
||||
break;
|
||||
case '\xFE':
|
||||
if ('\xFF' == aBuf[1])
|
||||
// FE FF UTF-16, big endian BOM
|
||||
mDetectedCharset = "UTF-16";
|
||||
case '\xFE':
|
||||
if ('\xFF' == aBuf[1]) {
|
||||
// FE FF UTF-16, big endian BOM
|
||||
mDetectedCharset = "UTF-16BE";
|
||||
}
|
||||
break;
|
||||
case '\xFF':
|
||||
if ('\xFE' == aBuf[1])
|
||||
// FF FE UTF-16, little endian BOM
|
||||
mDetectedCharset = "UTF-16";
|
||||
case '\xFF':
|
||||
if ('\xFE' == aBuf[1]) {
|
||||
// FF FE UTF-16, little endian BOM
|
||||
mDetectedCharset = "UTF-16LE";
|
||||
}
|
||||
break;
|
||||
} // switch
|
||||
}
|
||||
|
||||
if (mDetectedCharset)
|
||||
{
|
||||
mDone = PR_TRUE;
|
||||
return NS_OK;
|
||||
}
|
||||
if (mDetectedCharset)
|
||||
{
|
||||
mDone = PR_TRUE;
|
||||
return NS_OK;
|
||||
}
|
||||
}
|
||||
|
||||
PRUint32 i;
|
||||
|
|
Loading…
Reference in a new issue