forked from enviPath/enviPy
483 lines
14 KiB
JavaScript
483 lines
14 KiB
JavaScript
/****************************************************************************
|
|
* Copyright 2017 EPAM Systems
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
***************************************************************************/
|
|
|
|
var Vec2 = require('../../util/vec2');
|
|
|
|
var element = require('./../element');
|
|
var Struct = require('./../struct/index');
|
|
|
|
var sGroup = require('./parseSGroup');
|
|
var utils = require('./utils');
|
|
|
|
function parseAtomLineV3000(line) { // eslint-disable-line max-statements
|
|
/* reader */
|
|
var split, subsplit, key, value, i;
|
|
split = spaceparsplit(line);
|
|
var params = {
|
|
pp: new Vec2(parseFloat(split[2]), -parseFloat(split[3]), parseFloat(split[4])),
|
|
aam: split[5].trim()
|
|
};
|
|
var label = split[1].trim();
|
|
if (label.charAt(0) == '"' && label.charAt(label.length - 1) == '"')
|
|
label = label.substr(1, label.length - 2); // strip qutation marks
|
|
if (label.charAt(label.length - 1) == ']') { // assume atom list
|
|
label = label.substr(0, label.length - 1); // remove ']'
|
|
var atomListParams = {};
|
|
atomListParams.notList = false;
|
|
if (label.substr(0, 5) == 'NOT [') {
|
|
atomListParams.notList = true;
|
|
label = label.substr(5); // remove 'NOT ['
|
|
} else if (label.charAt(0) != '[') {
|
|
throw new Error('Error: atom list expected, found \'' + label + '\'');
|
|
} else {
|
|
label = label.substr(1); // remove '['
|
|
}
|
|
atomListParams.ids = labelsListToIds(label.split(','));
|
|
params['atomList'] = new Struct.AtomList(atomListParams);
|
|
params['label'] = 'L#';
|
|
} else {
|
|
params['label'] = label;
|
|
}
|
|
split.splice(0, 6);
|
|
for (i = 0; i < split.length; ++i) {
|
|
subsplit = splitonce(split[i], '=');
|
|
key = subsplit[0];
|
|
value = subsplit[1];
|
|
if (key in utils.fmtInfo.v30atomPropMap) {
|
|
var ival = utils.parseDecimalInt(value);
|
|
if (key == 'VAL') {
|
|
if (ival == 0)
|
|
continue; // eslint-disable-line no-continue
|
|
if (ival == -1)
|
|
ival = 0;
|
|
}
|
|
params[utils.fmtInfo.v30atomPropMap[key]] = ival;
|
|
} else if (key == 'RGROUPS') {
|
|
value = value.trim().substr(1, value.length - 2);
|
|
var rgrsplit = value.split(' ').slice(1);
|
|
params.rglabel = 0;
|
|
for (var j = 0; j < rgrsplit.length; ++j)
|
|
params.rglabel |= 1 << (rgrsplit[j] - 1);
|
|
} else if (key == 'ATTCHPT') {
|
|
params.attpnt = value.trim() - 0;
|
|
}
|
|
}
|
|
|
|
return new Struct.Atom(params);
|
|
}
|
|
|
|
function parseBondLineV3000(line) {
|
|
/* reader */
|
|
var split, subsplit, key, value, i;
|
|
split = spaceparsplit(line);
|
|
var params = {
|
|
begin: utils.parseDecimalInt(split[2]) - 1,
|
|
end: utils.parseDecimalInt(split[3]) - 1,
|
|
type: utils.fmtInfo.bondTypeMap[utils.parseDecimalInt(split[1])]
|
|
};
|
|
split.splice(0, 4);
|
|
for (i = 0; i < split.length; ++i) {
|
|
subsplit = splitonce(split[i], '=');
|
|
key = subsplit[0];
|
|
value = subsplit[1];
|
|
if (key == 'CFG') {
|
|
params.stereo = utils.fmtInfo.v30bondStereoMap[utils.parseDecimalInt(value)];
|
|
if (params.type == Struct.Bond.PATTERN.TYPE.DOUBLE && params.stereo == Struct.Bond.PATTERN.STEREO.EITHER)
|
|
params.stereo = Struct.Bond.PATTERN.STEREO.CIS_TRANS;
|
|
} else if (key == 'TOPO') {
|
|
params.topology = utils.fmtInfo.bondTopologyMap[utils.parseDecimalInt(value)];
|
|
} else if (key == 'RXCTR') {
|
|
params.reactingCenterStatus = utils.parseDecimalInt(value);
|
|
} else if (key == 'STBOX') {
|
|
params.stereoCare = utils.parseDecimalInt(value);
|
|
}
|
|
}
|
|
return new Struct.Bond(params);
|
|
}
|
|
|
|
function v3000parseCollection(ctab, ctabLines, shift) {
|
|
/* reader */
|
|
shift++;
|
|
while (ctabLines[shift].trim() != 'M V30 END COLLECTION')
|
|
shift++;
|
|
shift++;
|
|
return shift;
|
|
}
|
|
|
|
function v3000parseSGroup(ctab, ctabLines, sgroups, atomMap, shift) { // eslint-disable-line max-params, max-statements
|
|
/* reader */
|
|
var line = '';
|
|
shift++;
|
|
while (shift < ctabLines.length) {
|
|
line = stripV30(ctabLines[shift++]).trim();
|
|
if (line.trim() == 'END SGROUP')
|
|
return shift;
|
|
while (line.charAt(line.length - 1) == '-')
|
|
line = (line.substr(0, line.length - 1) + stripV30(ctabLines[shift++])).trim();
|
|
var split = splitSGroupDef(line);
|
|
var type = split[1];
|
|
var sg = new Struct.SGroup(type);
|
|
sg.number = split[0] - 0;
|
|
sg.type = type;
|
|
sg.label = split[2] - 0;
|
|
sgroups[sg.number] = sg;
|
|
var props = {};
|
|
for (var i = 3; i < split.length; ++i) {
|
|
var subsplit = splitonce(split[i], '=');
|
|
if (subsplit.length != 2)
|
|
throw new Error('A record of form AAA=BBB or AAA=(...) expected, got \'' + split[i] + '\'');
|
|
var name = subsplit[0];
|
|
if (!(name in props))
|
|
props[name] = [];
|
|
props[name].push(subsplit[1]);
|
|
}
|
|
sg.atoms = parseBracedNumberList(props['ATOMS'][0], -1);
|
|
if (props['PATOMS'])
|
|
sg.patoms = parseBracedNumberList(props['PATOMS'][0], -1);
|
|
sg.bonds = props['BONDS'] ? parseBracedNumberList(props['BONDS'][0], -1) : [];
|
|
var brkxyzStrs = props['BRKXYZ'];
|
|
sg.brkxyz = [];
|
|
if (brkxyzStrs) {
|
|
for (var j = 0; j < brkxyzStrs.length; ++j)
|
|
sg.brkxyz.push(parseBracedNumberList(brkxyzStrs[j]));
|
|
}
|
|
if (props['MULT'])
|
|
sg.data.subscript = props['MULT'][0] - 0;
|
|
if (props['LABEL'])
|
|
sg.data.subscript = props['LABEL'][0].trim();
|
|
if (props['CONNECT'])
|
|
sg.data.connectivity = props['CONNECT'][0].toLowerCase();
|
|
if (props['FIELDDISP'])
|
|
sGroup.applyDataSGroupInfo(sg, stripQuotes(props['FIELDDISP'][0]));
|
|
if (props['FIELDDATA'])
|
|
sGroup.applyDataSGroupData(sg, props['FIELDDATA'][0], true);
|
|
if (props['FIELDNAME'])
|
|
sGroup.applyDataSGroupName(sg, props['FIELDNAME'][0]);
|
|
if (props['QUERYTYPE'])
|
|
sGroup.applyDataSGroupQuery(sg, props['QUERYTYPE'][0]);
|
|
if (props['QUERYOP'])
|
|
sGroup.applyDataSGroupQueryOp(sg, props['QUERYOP'][0]);
|
|
sGroup.loadSGroup(ctab, sg, atomMap);
|
|
}
|
|
throw new Error('S-group declaration incomplete.');
|
|
}
|
|
|
|
function parseCTabV3000(ctabLines, norgroups) { // eslint-disable-line max-statements
|
|
/* reader */
|
|
var ctab = new Struct();
|
|
|
|
var shift = 0;
|
|
if (ctabLines[shift++].trim() != 'M V30 BEGIN CTAB')
|
|
throw Error('CTAB V3000 invalid');
|
|
if (ctabLines[shift].slice(0, 13) != 'M V30 COUNTS')
|
|
throw Error('CTAB V3000 invalid');
|
|
var vals = ctabLines[shift].slice(14).split(' ');
|
|
ctab.isChiral = (utils.parseDecimalInt(vals[4]) == 1);
|
|
shift++;
|
|
|
|
if (ctabLines[shift].trim() == 'M V30 BEGIN ATOM') {
|
|
shift++;
|
|
var line;
|
|
while (shift < ctabLines.length) {
|
|
line = stripV30(ctabLines[shift++]).trim();
|
|
if (line == 'END ATOM')
|
|
break;
|
|
while (line.charAt(line.length - 1) == '-')
|
|
line = (line.substring(0, line.length - 1) + stripV30(ctabLines[shift++])).trim();
|
|
ctab.atoms.add(parseAtomLineV3000(line));
|
|
}
|
|
|
|
if (ctabLines[shift].trim() == 'M V30 BEGIN BOND') {
|
|
shift++;
|
|
while (shift < ctabLines.length) {
|
|
line = stripV30(ctabLines[shift++]).trim();
|
|
if (line == 'END BOND')
|
|
break;
|
|
while (line.charAt(line.length - 1) == '-')
|
|
line = (line.substring(0, line.length - 1) + stripV30(ctabLines[shift++])).trim();
|
|
ctab.bonds.add(parseBondLineV3000(line));
|
|
}
|
|
}
|
|
|
|
// TODO: let sections follow in arbitrary order
|
|
var sgroups = {};
|
|
var atomMap = {};
|
|
|
|
while (ctabLines[shift].trim() != 'M V30 END CTAB') {
|
|
if (ctabLines[shift].trim() == 'M V30 BEGIN COLLECTION')
|
|
// TODO: read collection information
|
|
shift = v3000parseCollection(ctab, ctabLines, shift);
|
|
else if (ctabLines[shift].trim() == 'M V30 BEGIN SGROUP')
|
|
shift = v3000parseSGroup(ctab, ctabLines, sgroups, atomMap, shift);
|
|
else
|
|
throw Error('CTAB V3000 invalid');
|
|
}
|
|
}
|
|
if (ctabLines[shift++].trim() != 'M V30 END CTAB')
|
|
throw Error('CTAB V3000 invalid');
|
|
|
|
if (!norgroups)
|
|
readRGroups3000(ctab, ctabLines.slice(shift));
|
|
|
|
return ctab;
|
|
}
|
|
|
|
function readRGroups3000(ctab, /* string */ ctabLines) /* Struct */ { // eslint-disable-line max-statements
|
|
/* reader */
|
|
var rfrags = {};
|
|
var rLogic = {};
|
|
var shift = 0;
|
|
while (shift < ctabLines.length && ctabLines[shift].search('M V30 BEGIN RGROUP') == 0) {
|
|
var id = ctabLines[shift++].split(' ').pop();
|
|
rfrags[id] = [];
|
|
rLogic[id] = {};
|
|
while (true) { // eslint-disable-line no-constant-condition
|
|
var line = ctabLines[shift].trim();
|
|
if (line.search('M V30 RLOGIC') == 0) {
|
|
line = line.slice(13);
|
|
var rlsplit = line.trim().split(/\s+/g);
|
|
var iii = utils.parseDecimalInt(rlsplit[0]);
|
|
var hhh = utils.parseDecimalInt(rlsplit[1]);
|
|
var ooo = rlsplit.slice(2).join(' ');
|
|
var logic = {};
|
|
if (iii > 0)
|
|
logic.ifthen = iii;
|
|
logic.resth = hhh == 1;
|
|
logic.range = ooo;
|
|
rLogic[id] = logic;
|
|
shift++;
|
|
continue; // eslint-disable-line no-continue
|
|
}
|
|
if (line != 'M V30 BEGIN CTAB')
|
|
throw Error('CTAB V3000 invalid');
|
|
for (var i = 0; i < ctabLines.length; ++i) {
|
|
if (ctabLines[shift + i].trim() == 'M V30 END CTAB')
|
|
break;
|
|
}
|
|
var lines = ctabLines.slice(shift, shift + i + 1);
|
|
var rfrag = parseCTabV3000(lines, true);
|
|
rfrags[id].push(rfrag);
|
|
shift = shift + i + 1;
|
|
if (ctabLines[shift].trim() == 'M V30 END RGROUP') {
|
|
shift++;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (var rgid in rfrags) {
|
|
for (var j = 0; j < rfrags[rgid].length; ++j) {
|
|
var rg = rfrags[rgid][j];
|
|
rg.rgroups.set(rgid, new Struct.RGroup(rLogic[rgid]));
|
|
var frid = rg.frags.add({});
|
|
rg.rgroups.get(rgid).frags.add(frid);
|
|
rg.atoms.each(function (aid, atom) {
|
|
atom.fragment = frid;
|
|
});
|
|
rg.mergeInto(ctab);
|
|
}
|
|
}
|
|
}
|
|
|
|
function parseRxn3000(/* string[] */ ctabLines) /* Struct */ { // eslint-disable-line max-statements
|
|
/* reader */
|
|
ctabLines = ctabLines.slice(4);
|
|
var countsSplit = ctabLines[0].split(/\s+/g).slice(3);
|
|
var nReactants = countsSplit[0] - 0,
|
|
nProducts = countsSplit[1] - 0,
|
|
nAgents = countsSplit.length > 2 ? countsSplit[2] - 0 : 0;
|
|
|
|
function findCtabEnd(i) {
|
|
for (var j = i; j < ctabLines.length; ++j) {
|
|
if (ctabLines[j].trim() == 'M V30 END CTAB')
|
|
return j;
|
|
}
|
|
console.error('CTab format invalid');
|
|
}
|
|
|
|
function findRGroupEnd(i) {
|
|
for (var j = i; j < ctabLines.length; ++j) {
|
|
if (ctabLines[j].trim() == 'M V30 END RGROUP')
|
|
return j;
|
|
}
|
|
console.error('CTab format invalid');
|
|
}
|
|
|
|
var molLinesReactants = [];
|
|
var molLinesProducts = [];
|
|
var current = null;
|
|
var rGroups = [];
|
|
for (var i = 0; i < ctabLines.length; ++i) {
|
|
var line = ctabLines[i].trim();
|
|
var j;
|
|
|
|
if (line.startsWith('M V30 COUNTS')) {
|
|
// do nothing
|
|
} else if (line == 'M END') {
|
|
break; // stop reading
|
|
} else if (line == 'M V30 BEGIN PRODUCT') {
|
|
console.assert(current == null, 'CTab format invalid');
|
|
current = molLinesProducts;
|
|
} else if (line == 'M V30 END PRODUCT') {
|
|
console.assert(current === molLinesProducts, 'CTab format invalid');
|
|
current = null;
|
|
} else if (line == 'M V30 BEGIN REACTANT') {
|
|
console.assert(current == null, 'CTab format invalid');
|
|
current = molLinesReactants;
|
|
} else if (line == 'M V30 END REACTANT') {
|
|
console.assert(current === molLinesReactants, 'CTab format invalid');
|
|
current = null;
|
|
} else if (line.startsWith('M V30 BEGIN RGROUP')) {
|
|
console.assert(current == null, 'CTab format invalid');
|
|
j = findRGroupEnd(i);
|
|
rGroups.push(ctabLines.slice(i, j + 1));
|
|
i = j;
|
|
} else if (line == 'M V30 BEGIN CTAB') {
|
|
j = findCtabEnd(i);
|
|
current.push(ctabLines.slice(i, j + 1));
|
|
i = j;
|
|
} else {
|
|
throw new Error('line unrecognized: ' + line);
|
|
}
|
|
}
|
|
var mols = [];
|
|
var molLines = molLinesReactants.concat(molLinesProducts);
|
|
for (j = 0; j < molLines.length; ++j) {
|
|
var mol = parseCTabV3000(molLines[j], countsSplit);
|
|
mols.push(mol);
|
|
}
|
|
var ctab = utils.rxnMerge(mols, nReactants, nProducts, nAgents);
|
|
|
|
readRGroups3000(ctab, function (array) {
|
|
var res = [];
|
|
for (var k = 0; k < array.length; ++k)
|
|
res = res.concat(array[k]);
|
|
return res;
|
|
}(rGroups));
|
|
|
|
return ctab;
|
|
}
|
|
|
|
// split a line by spaces outside parentheses
|
|
function spaceparsplit(line) { // eslint-disable-line max-statements
|
|
/* reader */
|
|
var split = [];
|
|
var pc = 0;
|
|
var c;
|
|
var i;
|
|
var i0 = -1;
|
|
var quoted = false;
|
|
|
|
for (i = 0; i < line.length; ++i) {
|
|
c = line[i];
|
|
if (c == '(')
|
|
pc++;
|
|
else if (c == ')')
|
|
pc--;
|
|
if (c == '"')
|
|
quoted = !quoted;
|
|
if (!quoted && line[i] == ' ' && pc == 0) {
|
|
if (i > i0 + 1)
|
|
split.push(line.slice(i0 + 1, i));
|
|
i0 = i;
|
|
}
|
|
}
|
|
if (i > i0 + 1)
|
|
split.push(line.slice(i0 + 1, i));
|
|
return split;
|
|
}
|
|
|
|
// utils
|
|
function stripQuotes(str) {
|
|
if (str[0] === '"' && str[str.length - 1] === '"')
|
|
return str.substr(1, str.length - 2);
|
|
return str;
|
|
}
|
|
|
|
function splitonce(line, delim) {
|
|
/* reader */
|
|
var p = line.indexOf(delim);
|
|
return [line.slice(0, p), line.slice(p + 1)];
|
|
}
|
|
|
|
function splitSGroupDef(line) { // eslint-disable-line max-statements
|
|
/* reader */
|
|
var split = [];
|
|
var braceBalance = 0;
|
|
var quoted = false;
|
|
for (var i = 0; i < line.length; ++i) {
|
|
var c = line.charAt(i);
|
|
if (c == '"') {
|
|
quoted = !quoted;
|
|
} else if (!quoted) {
|
|
if (c == '(') {
|
|
braceBalance++;
|
|
} else if (c == ')') {
|
|
braceBalance--;
|
|
} else if (c == ' ' && braceBalance == 0) {
|
|
split.push(line.slice(0, i));
|
|
line = line.slice(i + 1).trim();
|
|
i = 0;
|
|
}
|
|
}
|
|
}
|
|
if (braceBalance != 0)
|
|
throw new Error('Brace balance broken. S-group properies invalid!');
|
|
if (line.length > 0)
|
|
split.push(line.trim());
|
|
return split;
|
|
}
|
|
|
|
function parseBracedNumberList(line, shift) {
|
|
/* reader */
|
|
if (!line)
|
|
return null;
|
|
var list = [];
|
|
line = line.trim();
|
|
line = line.substr(1, line.length - 2);
|
|
var split = line.split(' ');
|
|
shift = shift || 0;
|
|
|
|
for (var i = 1; i < split.length; ++i) {
|
|
var value = parseInt(split[i]);
|
|
if (!isNaN(value))
|
|
list.push(value + shift);
|
|
}
|
|
|
|
return list;
|
|
}
|
|
|
|
function stripV30(line) {
|
|
/* reader */
|
|
if (line.slice(0, 7) != 'M V30 ')
|
|
throw new Error('Prefix invalid');
|
|
return line.slice(7);
|
|
}
|
|
|
|
function labelsListToIds(labels) {
|
|
/* reader */
|
|
var ids = [];
|
|
for (var i = 0; i < labels.length; ++i)
|
|
ids.push(element.map[labels[i].trim()]);
|
|
return ids;
|
|
}
|
|
|
|
module.exports = {
|
|
parseCTabV3000: parseCTabV3000,
|
|
readRGroups3000: readRGroups3000,
|
|
parseRxn3000: parseRxn3000
|
|
};
|