forked from enviPath/enviPy
427 lines
14 KiB
JavaScript
427 lines
14 KiB
JavaScript
/****************************************************************************
|
|
* Copyright 2017 EPAM Systems
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
***************************************************************************/
|
|
|
|
var Vec2 = require('../../util/vec2');
|
|
var Map = require('../../util/map');
|
|
|
|
var element = require('./../element');
|
|
var Struct = require('./../struct/index');
|
|
|
|
var sGroup = require('./parseSGroup');
|
|
var utils = require('./utils');
|
|
|
|
var loadRGroupFragments = true; // TODO: set to load the fragments
|
|
|
|
function parseAtomLine(atomLine) {
|
|
/* reader */
|
|
var atomSplit = utils.partitionLine(atomLine, utils.fmtInfo.atomLinePartition);
|
|
var params =
|
|
{
|
|
// generic
|
|
pp: new Vec2(parseFloat(atomSplit[0]), -parseFloat(atomSplit[1]), parseFloat(atomSplit[2])),
|
|
label: atomSplit[4].trim(),
|
|
explicitValence: utils.fmtInfo.valenceMap[utils.parseDecimalInt(atomSplit[10])],
|
|
|
|
// obsolete
|
|
massDifference: utils.parseDecimalInt(atomSplit[5]),
|
|
charge: utils.fmtInfo.chargeMap[utils.parseDecimalInt(atomSplit[6])],
|
|
|
|
// query
|
|
hCount: utils.parseDecimalInt(utils.parseDecimalInt(atomSplit[8])),
|
|
stereoCare: utils.parseDecimalInt(atomSplit[9]) != 0,
|
|
|
|
// reaction
|
|
aam: utils.parseDecimalInt(atomSplit[14]),
|
|
invRet: utils.parseDecimalInt(atomSplit[15]),
|
|
|
|
// reaction query
|
|
exactChangeFlag: utils.parseDecimalInt(atomSplit[16]) != 0
|
|
};
|
|
return new Struct.Atom(params);
|
|
}
|
|
|
|
function parseBondLine(bondLine) {
|
|
/* reader */
|
|
var bondSplit = utils.partitionLine(bondLine, utils.fmtInfo.bondLinePartition);
|
|
var params =
|
|
{
|
|
begin: utils.parseDecimalInt(bondSplit[0]) - 1,
|
|
end: utils.parseDecimalInt(bondSplit[1]) - 1,
|
|
type: utils.fmtInfo.bondTypeMap[utils.parseDecimalInt(bondSplit[2])],
|
|
stereo: utils.fmtInfo.bondStereoMap[utils.parseDecimalInt(bondSplit[3])],
|
|
xxx: bondSplit[4],
|
|
topology: utils.fmtInfo.bondTopologyMap[utils.parseDecimalInt(bondSplit[5])],
|
|
reactingCenterStatus: utils.parseDecimalInt(bondSplit[6])
|
|
};
|
|
|
|
return new Struct.Bond(params);
|
|
}
|
|
|
|
function parseAtomListLine(/* string */atomListLine) {
|
|
/* reader */
|
|
var split = utils.partitionLine(atomListLine, utils.fmtInfo.atomListHeaderPartition);
|
|
|
|
var number = utils.parseDecimalInt(split[0]) - 1;
|
|
var notList = (split[2].trim() == 'T');
|
|
var count = utils.parseDecimalInt(split[4].trim());
|
|
|
|
var ids = atomListLine.slice(utils.fmtInfo.atomListHeaderLength);
|
|
var list = [];
|
|
var itemLength = utils.fmtInfo.atomListHeaderItemLength;
|
|
for (var i = 0; i < count; ++i)
|
|
list[i] = utils.parseDecimalInt(ids.slice(i * itemLength, ((i + 1) * itemLength) - 1));
|
|
|
|
return {
|
|
aid: number,
|
|
atomList: new Struct.AtomList({
|
|
notList: notList,
|
|
ids: list
|
|
})
|
|
};
|
|
}
|
|
|
|
function parsePropertyLines(ctab, ctabLines, shift, end, sGroups, rLogic) { // eslint-disable-line max-statements, max-params
|
|
/* reader */
|
|
var props = new Map();
|
|
while (shift < end) {
|
|
var line = ctabLines[shift];
|
|
if (line.charAt(0) == 'A') {
|
|
var propValue = ctabLines[++shift];
|
|
var isPseudo = /'.+'/.test(propValue);
|
|
if (isPseudo && !props.get('pseudo'))
|
|
props.set('pseudo', new Map());
|
|
if (!isPseudo && !props.get('alias'))
|
|
props.set('alias', new Map());
|
|
if (isPseudo) propValue = propValue.replace(/'/g, '');
|
|
props.get(isPseudo ? 'pseudo' : 'alias').set(utils.parseDecimalInt(line.slice(3, 6)) - 1, propValue);
|
|
} else if (line.charAt(0) == 'M') {
|
|
var type = line.slice(3, 6);
|
|
var propertyData = line.slice(6);
|
|
if (type == 'END') {
|
|
break;
|
|
} else if (type == 'CHG') {
|
|
if (!props.get('charge'))
|
|
props.set('charge', new Map());
|
|
props.get('charge').update(sGroup.readKeyValuePairs(propertyData));
|
|
} else if (type == 'RAD') {
|
|
if (!props.get('radical'))
|
|
props.set('radical', new Map());
|
|
props.get('radical').update(sGroup.readKeyValuePairs(propertyData));
|
|
} else if (type == 'ISO') {
|
|
if (!props.get('isotope'))
|
|
props.set('isotope', new Map());
|
|
props.get('isotope').update(sGroup.readKeyValuePairs(propertyData));
|
|
} else if (type == 'RBC') {
|
|
if (!props.get('ringBondCount'))
|
|
props.set('ringBondCount', new Map());
|
|
props.get('ringBondCount').update(sGroup.readKeyValuePairs(propertyData));
|
|
} else if (type == 'SUB') {
|
|
if (!props.get('substitutionCount'))
|
|
props.set('substitutionCount', new Map());
|
|
props.get('substitutionCount').update(sGroup.readKeyValuePairs(propertyData));
|
|
} else if (type == 'UNS') {
|
|
if (!props.get('unsaturatedAtom'))
|
|
props.set('unsaturatedAtom', new Map());
|
|
props.get('unsaturatedAtom').update(sGroup.readKeyValuePairs(propertyData));
|
|
// else if (type == "LIN") // link atom
|
|
} else if (type == 'RGP') { // rgroup atom
|
|
if (!props.get('rglabel'))
|
|
props.set('rglabel', new Map());
|
|
var rglabels = props.get('rglabel');
|
|
var a2rs = sGroup.readKeyMultiValuePairs(propertyData);
|
|
for (var a2ri = 0; a2ri < a2rs.length; a2ri++) {
|
|
var a2r = a2rs[a2ri];
|
|
rglabels.set(a2r[0], (rglabels.get(a2r[0]) || 0) | (1 << (a2r[1] - 1)));
|
|
}
|
|
} else if (type == 'LOG') { // rgroup atom
|
|
propertyData = propertyData.slice(4);
|
|
var rgid = utils.parseDecimalInt(propertyData.slice(0, 3).trim());
|
|
var iii = utils.parseDecimalInt(propertyData.slice(4, 7).trim());
|
|
var hhh = utils.parseDecimalInt(propertyData.slice(8, 11).trim());
|
|
var ooo = propertyData.slice(12).trim();
|
|
var logic = {};
|
|
if (iii > 0)
|
|
logic.ifthen = iii;
|
|
logic.resth = hhh == 1;
|
|
logic.range = ooo;
|
|
rLogic[rgid] = logic;
|
|
} else if (type == 'APO') {
|
|
if (!props.get('attpnt'))
|
|
props.set('attpnt', new Map());
|
|
props.get('attpnt').update(sGroup.readKeyValuePairs(propertyData));
|
|
} else if (type == 'ALS') { // atom list
|
|
if (!props.get('atomList'))
|
|
props.set('atomList', new Map());
|
|
var list = parsePropertyLineAtomList(
|
|
utils.partitionLine(propertyData, [1, 3, 3, 1, 1, 1]),
|
|
utils.partitionLineFixed(propertyData.slice(10), 4, false));
|
|
props.get('atomList').update(
|
|
list);
|
|
if (!props.get('label'))
|
|
props.set('label', new Map());
|
|
for (var aid in list) props.get('label').set(aid, 'L#');
|
|
} else if (type == 'STY') { // introduce s-group
|
|
sGroup.initSGroup(sGroups, propertyData);
|
|
} else if (type == 'SST') {
|
|
sGroup.applySGroupProp(sGroups, 'subtype', propertyData);
|
|
} else if (type == 'SLB') {
|
|
sGroup.applySGroupProp(sGroups, 'label', propertyData, true);
|
|
} else if (type == 'SPL') {
|
|
sGroup.applySGroupProp(sGroups, 'parent', propertyData, true, true);
|
|
} else if (type == 'SCN') {
|
|
sGroup.applySGroupProp(sGroups, 'connectivity', propertyData);
|
|
} else if (type == 'SAL') {
|
|
sGroup.applySGroupArrayProp(sGroups, 'atoms', propertyData, -1);
|
|
} else if (type == 'SBL') {
|
|
sGroup.applySGroupArrayProp(sGroups, 'bonds', propertyData, -1);
|
|
} else if (type == 'SPA') {
|
|
sGroup.applySGroupArrayProp(sGroups, 'patoms', propertyData, -1);
|
|
} else if (type == 'SMT') {
|
|
var sid = utils.parseDecimalInt(propertyData.slice(0, 4)) - 1;
|
|
sGroups[sid].data.subscript = propertyData.slice(4).trim();
|
|
} else if (type == 'SDT') {
|
|
sGroup.applyDataSGroupDesc(sGroups, propertyData);
|
|
} else if (type == 'SDD') {
|
|
sGroup.applyDataSGroupInfoLine(sGroups, propertyData);
|
|
} else if (type == 'SCD') {
|
|
sGroup.applyDataSGroupDataLine(sGroups, propertyData, false);
|
|
} else if (type == 'SED') {
|
|
sGroup.applyDataSGroupDataLine(sGroups, propertyData, true);
|
|
}
|
|
}
|
|
++shift;
|
|
}
|
|
return props;
|
|
}
|
|
|
|
function applyAtomProp(atoms /* Pool */, values /* Map */, propId /* string */) {
|
|
/* reader */
|
|
values.each(function (aid, propVal) {
|
|
atoms.get(aid)[propId] = propVal;
|
|
});
|
|
}
|
|
|
|
function parseCTabV2000(ctabLines, countsSplit) { // eslint-disable-line max-statements
|
|
/* reader */
|
|
var ctab = new Struct();
|
|
var i;
|
|
var atomCount = utils.parseDecimalInt(countsSplit[0]);
|
|
var bondCount = utils.parseDecimalInt(countsSplit[1]);
|
|
var atomListCount = utils.parseDecimalInt(countsSplit[2]);
|
|
ctab.isChiral = utils.parseDecimalInt(countsSplit[4]) != 0;
|
|
var stextLinesCount = utils.parseDecimalInt(countsSplit[5]);
|
|
var propertyLinesCount = utils.parseDecimalInt(countsSplit[10]);
|
|
|
|
var shift = 0;
|
|
var atomLines = ctabLines.slice(shift, shift + atomCount);
|
|
shift += atomCount;
|
|
var bondLines = ctabLines.slice(shift, shift + bondCount);
|
|
shift += bondCount;
|
|
var atomListLines = ctabLines.slice(shift, shift + atomListCount);
|
|
shift += atomListCount + stextLinesCount;
|
|
|
|
var atoms = atomLines.map(parseAtomLine);
|
|
for (i = 0; i < atoms.length; ++i)
|
|
ctab.atoms.add(atoms[i]);
|
|
var bonds = bondLines.map(parseBondLine);
|
|
for (i = 0; i < bonds.length; ++i)
|
|
ctab.bonds.add(bonds[i]);
|
|
|
|
var atomLists = atomListLines.map(parseAtomListLine);
|
|
atomLists.forEach(function (pair) {
|
|
ctab.atoms.get(pair.aid).atomList = pair.atomList;
|
|
ctab.atoms.get(pair.aid).label = 'L#';
|
|
});
|
|
|
|
var sGroups = {};
|
|
var rLogic = {};
|
|
var props = parsePropertyLines(ctab, ctabLines, shift,
|
|
Math.min(ctabLines.length, shift + propertyLinesCount), sGroups, rLogic);
|
|
props.each(function (propId, values) {
|
|
applyAtomProp(ctab.atoms, values, propId);
|
|
});
|
|
|
|
var atomMap = {};
|
|
var sid;
|
|
for (sid in sGroups) {
|
|
var sg = sGroups[sid];
|
|
if (sg.type === 'DAT' && sg.atoms.length === 0) {
|
|
var parent = sGroups[sid].parent;
|
|
if (parent >= 0) {
|
|
var psg = sGroups[parent - 1];
|
|
if (psg.type === 'GEN')
|
|
sg.atoms = [].slice.call(psg.atoms);
|
|
}
|
|
}
|
|
}
|
|
for (sid in sGroups)
|
|
sGroup.loadSGroup(ctab, sGroups[sid], atomMap);
|
|
var emptyGroups = [];
|
|
for (sid in sGroups) { // TODO: why do we need that?
|
|
Struct.SGroup.filter(ctab, sGroups[sid], atomMap);
|
|
if (sGroups[sid].atoms.length == 0 && !sGroups[sid].allAtoms)
|
|
emptyGroups.push(sid);
|
|
}
|
|
for (i = 0; i < emptyGroups.length; ++i) {
|
|
ctab.sGroupForest.remove(emptyGroups[i]);
|
|
ctab.sgroups.remove(emptyGroups[i]);
|
|
}
|
|
for (var rgid in rLogic)
|
|
ctab.rgroups.set(rgid, new Struct.RGroup(rLogic[rgid]));
|
|
return ctab;
|
|
}
|
|
|
|
function parseRg2000(/* string[] */ ctabLines) /* Struct */ { // eslint-disable-line max-statements
|
|
ctabLines = ctabLines.slice(7);
|
|
if (ctabLines[0].trim() != '$CTAB')
|
|
throw new Error('RGFile format invalid');
|
|
var i = 1;
|
|
while (ctabLines[i].charAt(0) != '$') i++;
|
|
if (ctabLines[i].trim() != '$END CTAB')
|
|
throw new Error('RGFile format invalid');
|
|
var coreLines = ctabLines.slice(1, i);
|
|
ctabLines = ctabLines.slice(i + 1);
|
|
var fragmentLines = {};
|
|
while (true) { // eslint-disable-line no-constant-condition
|
|
if (ctabLines.length == 0)
|
|
throw new Error('Unexpected end of file');
|
|
var line = ctabLines[0].trim();
|
|
if (line == '$END MOL') {
|
|
ctabLines = ctabLines.slice(1);
|
|
break;
|
|
}
|
|
if (line != '$RGP')
|
|
throw new Error('RGFile format invalid');
|
|
var rgid = ctabLines[1].trim() - 0;
|
|
fragmentLines[rgid] = [];
|
|
ctabLines = ctabLines.slice(2);
|
|
while (true) { // eslint-disable-line no-constant-condition
|
|
if (ctabLines.length == 0)
|
|
throw new Error('Unexpected end of file');
|
|
line = ctabLines[0].trim();
|
|
if (line == '$END RGP') {
|
|
ctabLines = ctabLines.slice(1);
|
|
break;
|
|
}
|
|
if (line != '$CTAB')
|
|
throw new Error('RGFile format invalid');
|
|
i = 1;
|
|
while (ctabLines[i].charAt(0) != '$') i++;
|
|
if (ctabLines[i].trim() != '$END CTAB')
|
|
throw new Error('RGFile format invalid');
|
|
fragmentLines[rgid].push(ctabLines.slice(1, i));
|
|
ctabLines = ctabLines.slice(i + 1);
|
|
}
|
|
}
|
|
|
|
var core = parseCTab(coreLines);
|
|
var frag = {};
|
|
if (loadRGroupFragments) {
|
|
for (var id in fragmentLines) {
|
|
frag[id] = [];
|
|
for (var j = 0; j < fragmentLines[id].length; ++j)
|
|
frag[id].push(parseCTab(fragmentLines[id][j]));
|
|
}
|
|
}
|
|
return rgMerge(core, frag);
|
|
}
|
|
|
|
function parseRxn2000(/* string[] */ ctabLines) /* Struct */ { // eslint-disable-line max-statements
|
|
/* reader */
|
|
ctabLines = ctabLines.slice(4);
|
|
var countsSplit = utils.partitionLine(ctabLines[0], utils.fmtInfo.rxnItemsPartition);
|
|
var nReactants = countsSplit[0] - 0,
|
|
nProducts = countsSplit[1] - 0,
|
|
nAgents = countsSplit[2] - 0;
|
|
ctabLines = ctabLines.slice(1); // consume counts line
|
|
|
|
var mols = [];
|
|
while (ctabLines.length > 0 && ctabLines[0].substr(0, 4) == '$MOL') {
|
|
ctabLines = ctabLines.slice(1);
|
|
var n = 0;
|
|
while (n < ctabLines.length && ctabLines[n].substr(0, 4) != '$MOL') n++;
|
|
|
|
var lines = ctabLines.slice(0, n);
|
|
var struct;
|
|
if (lines[0].search('\\$MDL') == 0) {
|
|
struct = parseRg2000(lines);
|
|
} else {
|
|
struct = parseCTab(lines.slice(3));
|
|
struct.name = lines[0].trim();
|
|
}
|
|
mols.push(struct);
|
|
ctabLines = ctabLines.slice(n);
|
|
}
|
|
return utils.rxnMerge(mols, nReactants, nProducts, nAgents);
|
|
}
|
|
|
|
function parseCTab(/* string */ ctabLines) /* Struct */ {
|
|
/* reader */
|
|
var countsSplit = utils.partitionLine(ctabLines[0], utils.fmtInfo.countsLinePartition);
|
|
ctabLines = ctabLines.slice(1);
|
|
return parseCTabV2000(ctabLines, countsSplit);
|
|
}
|
|
|
|
function rgMerge(scaffold, rgroups) /* Struct */ {
|
|
/* reader */
|
|
var ret = new Struct();
|
|
|
|
scaffold.mergeInto(ret, null, null, false, true);
|
|
for (var rgid in rgroups) {
|
|
for (var j = 0; j < rgroups[rgid].length; ++j) {
|
|
var ctab = rgroups[rgid][j];
|
|
ctab.rgroups.set(rgid, new Struct.RGroup());
|
|
var frag = {};
|
|
var frid = ctab.frags.add(frag);
|
|
ctab.rgroups.get(rgid).frags.add(frid);
|
|
ctab.atoms.each(function (aid, atom) {
|
|
atom.fragment = frid;
|
|
});
|
|
ctab.mergeInto(ret);
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
function labelsListToIds(labels) {
|
|
/* reader */
|
|
var ids = [];
|
|
for (var i = 0; i < labels.length; ++i)
|
|
ids.push(element.map[labels[i].trim()]);
|
|
return ids;
|
|
}
|
|
|
|
function parsePropertyLineAtomList(hdr, lst) {
|
|
/* reader */
|
|
var aid = utils.parseDecimalInt(hdr[1]) - 1;
|
|
var count = utils.parseDecimalInt(hdr[2]);
|
|
var notList = hdr[4].trim() == 'T';
|
|
var ids = labelsListToIds(lst.slice(0, count));
|
|
var ret = {};
|
|
ret[aid] = new Struct.AtomList({
|
|
notList: notList,
|
|
ids: ids
|
|
});
|
|
return ret;
|
|
}
|
|
|
|
module.exports = {
|
|
parseCTabV2000: parseCTabV2000,
|
|
parseRg2000: parseRg2000,
|
|
parseRxn2000: parseRxn2000
|
|
};
|