forked from enviPath/enviPy
Current Dev State
This commit is contained in:
210
static/js/ketcher2/node_modules/jschardet/src/universaldetector.js
generated
vendored
Executable file
210
static/js/ketcher2/node_modules/jschardet/src/universaldetector.js
generated
vendored
Executable file
@ -0,0 +1,210 @@
|
||||
/*
|
||||
* The Original Code is Mozilla Universal charset detector code.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* Netscape Communications Corporation.
|
||||
* Portions created by the Initial Developer are Copyright (C) 2001
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
* António Afonso (antonio.afonso gmail.com) - port to JavaScript
|
||||
* Mark Pilgrim - port to Python
|
||||
* Shy Shalom - original C code
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
* 02110-1301 USA
|
||||
*/
|
||||
|
||||
/**
|
||||
* This is a port from the python port, version "2.0.1"
|
||||
*/
|
||||
|
||||
!function(jschardet) {
|
||||
|
||||
jschardet.UniversalDetector = function() {
|
||||
var MINIMUM_THRESHOLD = jschardet.Constants.MINIMUM_THRESHOLD;
|
||||
var _state = {
|
||||
pureAscii : 0,
|
||||
escAscii : 1,
|
||||
highbyte : 2
|
||||
};
|
||||
var self = this;
|
||||
|
||||
function init() {
|
||||
self._highBitDetector = /[\x80-\xFF]/;
|
||||
self._escDetector = /(\x1B|~\{)/;
|
||||
self._mEscCharsetProber = null;
|
||||
self._mCharsetProbers = [];
|
||||
self.reset();
|
||||
}
|
||||
|
||||
this.reset = function() {
|
||||
this.result = {"encoding": null, "confidence": 0.0};
|
||||
this.done = false;
|
||||
this._mStart = true;
|
||||
this._mGotData = false;
|
||||
this._mInputState = _state.pureAscii;
|
||||
this._mLastChar = "";
|
||||
this._mBOM = "";
|
||||
if( this._mEscCharsetProber ) {
|
||||
this._mEscCharsetProber.reset();
|
||||
}
|
||||
for( var i = 0, prober; prober = this._mCharsetProbers[i]; i++ ) {
|
||||
prober.reset();
|
||||
}
|
||||
}
|
||||
|
||||
this.feed = function(aBuf) {
|
||||
if( this.done ) return;
|
||||
|
||||
var aLen = aBuf.length;
|
||||
if( !aLen ) return;
|
||||
|
||||
if( !this._mGotData ) {
|
||||
this._mBOM += aBuf;
|
||||
// If the data starts with BOM, we know it is UTF
|
||||
if( this._mBOM.slice(0,3) == "\xEF\xBB\xBF" ) {
|
||||
// EF BB BF UTF-8 with BOM
|
||||
this.result = {"encoding": "UTF-8", "confidence": 1.0};
|
||||
} else if( this._mBOM.slice(0,4) == "\xFF\xFE\x00\x00" ) {
|
||||
// FF FE 00 00 UTF-32, little-endian BOM
|
||||
this.result = {"encoding": "UTF-32LE", "confidence": 1.0};
|
||||
} else if( this._mBOM.slice(0,4) == "\x00\x00\xFE\xFF" ) {
|
||||
// 00 00 FE FF UTF-32, big-endian BOM
|
||||
this.result = {"encoding": "UTF-32BE", "confidence": 1.0};
|
||||
} else if( this._mBOM.slice(0,4) == "\xFE\xFF\x00\x00" ) {
|
||||
// FE FF 00 00 UCS-4, unusual octet order BOM (3412)
|
||||
this.result = {"encoding": "X-ISO-10646-UCS-4-3412", "confidence": 1.0};
|
||||
} else if( this._mBOM.slice(0,4) == "\x00\x00\xFF\xFE" ) {
|
||||
// 00 00 FF FE UCS-4, unusual octet order BOM (2143)
|
||||
this.result = {"encoding": "X-ISO-10646-UCS-4-2143", "confidence": 1.0};
|
||||
} else if( this._mBOM.slice(0,2) == "\xFF\xFE" ) {
|
||||
// FF FE UTF-16, little endian BOM
|
||||
this.result = {"encoding": "UTF-16LE", "confidence": 1.0};
|
||||
} else if( this._mBOM.slice(0,2) == "\xFE\xFF" ) {
|
||||
// FE FF UTF-16, big endian BOM
|
||||
this.result = {"encoding": "UTF-16BE", "confidence": 1.0};
|
||||
}
|
||||
|
||||
// If we got to 4 chars without being able to detect a BOM we
|
||||
// stop trying.
|
||||
if( this._mBOM.length > 3 ) {
|
||||
this._mGotData = true;
|
||||
}
|
||||
}
|
||||
|
||||
if( this.result.encoding && (this.result.confidence > 0.0) ) {
|
||||
this.done = true;
|
||||
return;
|
||||
}
|
||||
|
||||
if( this._mInputState == _state.pureAscii ) {
|
||||
if( this._highBitDetector.test(aBuf) ) {
|
||||
this._mInputState = _state.highbyte;
|
||||
} else if( this._escDetector.test(this._mLastChar + aBuf) ) {
|
||||
this._mInputState = _state.escAscii;
|
||||
}
|
||||
}
|
||||
|
||||
this._mLastChar = aBuf.slice(-1);
|
||||
|
||||
if( this._mInputState == _state.escAscii ) {
|
||||
if( !this._mEscCharsetProber ) {
|
||||
this._mEscCharsetProber = new jschardet.EscCharSetProber();
|
||||
}
|
||||
if( this._mEscCharsetProber.feed(aBuf) == jschardet.Constants.foundIt ) {
|
||||
this.result = {
|
||||
"encoding": this._mEscCharsetProber.getCharsetName(),
|
||||
"confidence": this._mEscCharsetProber.getConfidence()
|
||||
};
|
||||
this.done = true;
|
||||
}
|
||||
} else if( this._mInputState == _state.highbyte ) {
|
||||
if( this._mCharsetProbers.length == 0 ) {
|
||||
this._mCharsetProbers = [
|
||||
new jschardet.MBCSGroupProber(),
|
||||
new jschardet.SBCSGroupProber(),
|
||||
new jschardet.Latin1Prober()
|
||||
];
|
||||
}
|
||||
for( var i = 0, prober; prober = this._mCharsetProbers[i]; i++ ) {
|
||||
if( prober.feed(aBuf) == jschardet.Constants.foundIt ) {
|
||||
this.result = {
|
||||
"encoding": prober.getCharsetName(),
|
||||
"confidence": prober.getConfidence()
|
||||
};
|
||||
this.done = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this.close = function() {
|
||||
if( this.done ) return;
|
||||
if( this._mBOM.length === 0 ) {
|
||||
if( jschardet.Constants._debug ) {
|
||||
jschardet.log("no data received!\n");
|
||||
}
|
||||
return;
|
||||
}
|
||||
this.done = true;
|
||||
|
||||
if( this._mInputState == _state.pureAscii ) {
|
||||
if( jschardet.Constants._debug ) {
|
||||
jschardet.log("pure ascii")
|
||||
}
|
||||
this.result = {"encoding": "ascii", "confidence": 1.0};
|
||||
return this.result;
|
||||
}
|
||||
|
||||
if( this._mInputState == _state.highbyte ) {
|
||||
var proberConfidence = null;
|
||||
var maxProberConfidence = 0.0;
|
||||
var maxProber = null;
|
||||
for( var i = 0, prober; prober = this._mCharsetProbers[i]; i++ ) {
|
||||
if( !prober ) continue;
|
||||
proberConfidence = prober.getConfidence();
|
||||
if( proberConfidence > maxProberConfidence ) {
|
||||
maxProberConfidence = proberConfidence;
|
||||
maxProber = prober;
|
||||
}
|
||||
if( jschardet.Constants._debug ) {
|
||||
jschardet.log(prober.getCharsetName() + " confidence " + prober.getConfidence());
|
||||
}
|
||||
}
|
||||
if( maxProber && maxProberConfidence > MINIMUM_THRESHOLD ) {
|
||||
this.result = {
|
||||
"encoding": maxProber.getCharsetName(),
|
||||
"confidence": maxProber.getConfidence()
|
||||
};
|
||||
return this.result;
|
||||
}
|
||||
}
|
||||
|
||||
if( jschardet.Constants._debug ) {
|
||||
jschardet.log("no probers hit minimum threshhold\n");
|
||||
for( var i = 0, prober; prober = this._mCharsetProbers[i]; i++ ) {
|
||||
if( !prober ) continue;
|
||||
jschardet.log(prober.getCharsetName() + " confidence = " +
|
||||
prober.getConfidence() + "\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
init();
|
||||
}
|
||||
|
||||
}(require('./init'));
|
||||
Reference in New Issue
Block a user