/**************************************************************************** * Copyright 2017 EPAM Systems * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ***************************************************************************/ var Vec2 = require('../../util/vec2'); var element = require('./../element'); var Struct = require('./../struct/index'); var sGroup = require('./parseSGroup'); var utils = require('./utils'); function parseAtomLineV3000(line) { // eslint-disable-line max-statements /* reader */ var split, subsplit, key, value, i; split = spaceparsplit(line); var params = { pp: new Vec2(parseFloat(split[2]), -parseFloat(split[3]), parseFloat(split[4])), aam: split[5].trim() }; var label = split[1].trim(); if (label.charAt(0) == '"' && label.charAt(label.length - 1) == '"') label = label.substr(1, label.length - 2); // strip qutation marks if (label.charAt(label.length - 1) == ']') { // assume atom list label = label.substr(0, label.length - 1); // remove ']' var atomListParams = {}; atomListParams.notList = false; if (label.substr(0, 5) == 'NOT [') { atomListParams.notList = true; label = label.substr(5); // remove 'NOT [' } else if (label.charAt(0) != '[') { throw new Error('Error: atom list expected, found \'' + label + '\''); } else { label = label.substr(1); // remove '[' } atomListParams.ids = labelsListToIds(label.split(',')); params['atomList'] = new Struct.AtomList(atomListParams); params['label'] = 'L#'; } else { params['label'] = label; } split.splice(0, 6); for (i = 0; i < split.length; ++i) { subsplit = splitonce(split[i], '='); key = subsplit[0]; value = subsplit[1]; if (key in utils.fmtInfo.v30atomPropMap) { var ival = utils.parseDecimalInt(value); if (key == 'VAL') { if (ival == 0) continue; // eslint-disable-line no-continue if (ival == -1) ival = 0; } params[utils.fmtInfo.v30atomPropMap[key]] = ival; } else if (key == 'RGROUPS') { value = value.trim().substr(1, value.length - 2); var rgrsplit = value.split(' ').slice(1); params.rglabel = 0; for (var j = 0; j < rgrsplit.length; ++j) params.rglabel |= 1 << (rgrsplit[j] - 1); } else if (key == 'ATTCHPT') { params.attpnt = value.trim() - 0; } } return new Struct.Atom(params); } function parseBondLineV3000(line) { /* reader */ var split, subsplit, key, value, i; split = spaceparsplit(line); var params = { begin: utils.parseDecimalInt(split[2]) - 1, end: utils.parseDecimalInt(split[3]) - 1, type: utils.fmtInfo.bondTypeMap[utils.parseDecimalInt(split[1])] }; split.splice(0, 4); for (i = 0; i < split.length; ++i) { subsplit = splitonce(split[i], '='); key = subsplit[0]; value = subsplit[1]; if (key == 'CFG') { params.stereo = utils.fmtInfo.v30bondStereoMap[utils.parseDecimalInt(value)]; if (params.type == Struct.Bond.PATTERN.TYPE.DOUBLE && params.stereo == Struct.Bond.PATTERN.STEREO.EITHER) params.stereo = Struct.Bond.PATTERN.STEREO.CIS_TRANS; } else if (key == 'TOPO') { params.topology = utils.fmtInfo.bondTopologyMap[utils.parseDecimalInt(value)]; } else if (key == 'RXCTR') { params.reactingCenterStatus = utils.parseDecimalInt(value); } else if (key == 'STBOX') { params.stereoCare = utils.parseDecimalInt(value); } } return new Struct.Bond(params); } function v3000parseCollection(ctab, ctabLines, shift) { /* reader */ shift++; while (ctabLines[shift].trim() != 'M V30 END COLLECTION') shift++; shift++; return shift; } function v3000parseSGroup(ctab, ctabLines, sgroups, atomMap, shift) { // eslint-disable-line max-params, max-statements /* reader */ var line = ''; shift++; while (shift < ctabLines.length) { line = stripV30(ctabLines[shift++]).trim(); if (line.trim() == 'END SGROUP') return shift; while (line.charAt(line.length - 1) == '-') line = (line.substr(0, line.length - 1) + stripV30(ctabLines[shift++])).trim(); var split = splitSGroupDef(line); var type = split[1]; var sg = new Struct.SGroup(type); sg.number = split[0] - 0; sg.type = type; sg.label = split[2] - 0; sgroups[sg.number] = sg; var props = {}; for (var i = 3; i < split.length; ++i) { var subsplit = splitonce(split[i], '='); if (subsplit.length != 2) throw new Error('A record of form AAA=BBB or AAA=(...) expected, got \'' + split[i] + '\''); var name = subsplit[0]; if (!(name in props)) props[name] = []; props[name].push(subsplit[1]); } sg.atoms = parseBracedNumberList(props['ATOMS'][0], -1); if (props['PATOMS']) sg.patoms = parseBracedNumberList(props['PATOMS'][0], -1); sg.bonds = props['BONDS'] ? parseBracedNumberList(props['BONDS'][0], -1) : []; var brkxyzStrs = props['BRKXYZ']; sg.brkxyz = []; if (brkxyzStrs) { for (var j = 0; j < brkxyzStrs.length; ++j) sg.brkxyz.push(parseBracedNumberList(brkxyzStrs[j])); } if (props['MULT']) sg.data.subscript = props['MULT'][0] - 0; if (props['LABEL']) sg.data.subscript = props['LABEL'][0].trim(); if (props['CONNECT']) sg.data.connectivity = props['CONNECT'][0].toLowerCase(); if (props['FIELDDISP']) sGroup.applyDataSGroupInfo(sg, stripQuotes(props['FIELDDISP'][0])); if (props['FIELDDATA']) sGroup.applyDataSGroupData(sg, props['FIELDDATA'][0], true); if (props['FIELDNAME']) sGroup.applyDataSGroupName(sg, props['FIELDNAME'][0]); if (props['QUERYTYPE']) sGroup.applyDataSGroupQuery(sg, props['QUERYTYPE'][0]); if (props['QUERYOP']) sGroup.applyDataSGroupQueryOp(sg, props['QUERYOP'][0]); sGroup.loadSGroup(ctab, sg, atomMap); } throw new Error('S-group declaration incomplete.'); } function parseCTabV3000(ctabLines, norgroups) { // eslint-disable-line max-statements /* reader */ var ctab = new Struct(); var shift = 0; if (ctabLines[shift++].trim() != 'M V30 BEGIN CTAB') throw Error('CTAB V3000 invalid'); if (ctabLines[shift].slice(0, 13) != 'M V30 COUNTS') throw Error('CTAB V3000 invalid'); var vals = ctabLines[shift].slice(14).split(' '); ctab.isChiral = (utils.parseDecimalInt(vals[4]) == 1); shift++; if (ctabLines[shift].trim() == 'M V30 BEGIN ATOM') { shift++; var line; while (shift < ctabLines.length) { line = stripV30(ctabLines[shift++]).trim(); if (line == 'END ATOM') break; while (line.charAt(line.length - 1) == '-') line = (line.substring(0, line.length - 1) + stripV30(ctabLines[shift++])).trim(); ctab.atoms.add(parseAtomLineV3000(line)); } if (ctabLines[shift].trim() == 'M V30 BEGIN BOND') { shift++; while (shift < ctabLines.length) { line = stripV30(ctabLines[shift++]).trim(); if (line == 'END BOND') break; while (line.charAt(line.length - 1) == '-') line = (line.substring(0, line.length - 1) + stripV30(ctabLines[shift++])).trim(); ctab.bonds.add(parseBondLineV3000(line)); } } // TODO: let sections follow in arbitrary order var sgroups = {}; var atomMap = {}; while (ctabLines[shift].trim() != 'M V30 END CTAB') { if (ctabLines[shift].trim() == 'M V30 BEGIN COLLECTION') // TODO: read collection information shift = v3000parseCollection(ctab, ctabLines, shift); else if (ctabLines[shift].trim() == 'M V30 BEGIN SGROUP') shift = v3000parseSGroup(ctab, ctabLines, sgroups, atomMap, shift); else throw Error('CTAB V3000 invalid'); } } if (ctabLines[shift++].trim() != 'M V30 END CTAB') throw Error('CTAB V3000 invalid'); if (!norgroups) readRGroups3000(ctab, ctabLines.slice(shift)); return ctab; } function readRGroups3000(ctab, /* string */ ctabLines) /* Struct */ { // eslint-disable-line max-statements /* reader */ var rfrags = {}; var rLogic = {}; var shift = 0; while (shift < ctabLines.length && ctabLines[shift].search('M V30 BEGIN RGROUP') == 0) { var id = ctabLines[shift++].split(' ').pop(); rfrags[id] = []; rLogic[id] = {}; while (true) { // eslint-disable-line no-constant-condition var line = ctabLines[shift].trim(); if (line.search('M V30 RLOGIC') == 0) { line = line.slice(13); var rlsplit = line.trim().split(/\s+/g); var iii = utils.parseDecimalInt(rlsplit[0]); var hhh = utils.parseDecimalInt(rlsplit[1]); var ooo = rlsplit.slice(2).join(' '); var logic = {}; if (iii > 0) logic.ifthen = iii; logic.resth = hhh == 1; logic.range = ooo; rLogic[id] = logic; shift++; continue; // eslint-disable-line no-continue } if (line != 'M V30 BEGIN CTAB') throw Error('CTAB V3000 invalid'); for (var i = 0; i < ctabLines.length; ++i) { if (ctabLines[shift + i].trim() == 'M V30 END CTAB') break; } var lines = ctabLines.slice(shift, shift + i + 1); var rfrag = parseCTabV3000(lines, true); rfrags[id].push(rfrag); shift = shift + i + 1; if (ctabLines[shift].trim() == 'M V30 END RGROUP') { shift++; break; } } } for (var rgid in rfrags) { for (var j = 0; j < rfrags[rgid].length; ++j) { var rg = rfrags[rgid][j]; rg.rgroups.set(rgid, new Struct.RGroup(rLogic[rgid])); var frid = rg.frags.add({}); rg.rgroups.get(rgid).frags.add(frid); rg.atoms.each(function (aid, atom) { atom.fragment = frid; }); rg.mergeInto(ctab); } } } function parseRxn3000(/* string[] */ ctabLines) /* Struct */ { // eslint-disable-line max-statements /* reader */ ctabLines = ctabLines.slice(4); var countsSplit = ctabLines[0].split(/\s+/g).slice(3); var nReactants = countsSplit[0] - 0, nProducts = countsSplit[1] - 0, nAgents = countsSplit.length > 2 ? countsSplit[2] - 0 : 0; function findCtabEnd(i) { for (var j = i; j < ctabLines.length; ++j) { if (ctabLines[j].trim() == 'M V30 END CTAB') return j; } console.error('CTab format invalid'); } function findRGroupEnd(i) { for (var j = i; j < ctabLines.length; ++j) { if (ctabLines[j].trim() == 'M V30 END RGROUP') return j; } console.error('CTab format invalid'); } var molLinesReactants = []; var molLinesProducts = []; var current = null; var rGroups = []; for (var i = 0; i < ctabLines.length; ++i) { var line = ctabLines[i].trim(); var j; if (line.startsWith('M V30 COUNTS')) { // do nothing } else if (line == 'M END') { break; // stop reading } else if (line == 'M V30 BEGIN PRODUCT') { console.assert(current == null, 'CTab format invalid'); current = molLinesProducts; } else if (line == 'M V30 END PRODUCT') { console.assert(current === molLinesProducts, 'CTab format invalid'); current = null; } else if (line == 'M V30 BEGIN REACTANT') { console.assert(current == null, 'CTab format invalid'); current = molLinesReactants; } else if (line == 'M V30 END REACTANT') { console.assert(current === molLinesReactants, 'CTab format invalid'); current = null; } else if (line.startsWith('M V30 BEGIN RGROUP')) { console.assert(current == null, 'CTab format invalid'); j = findRGroupEnd(i); rGroups.push(ctabLines.slice(i, j + 1)); i = j; } else if (line == 'M V30 BEGIN CTAB') { j = findCtabEnd(i); current.push(ctabLines.slice(i, j + 1)); i = j; } else { throw new Error('line unrecognized: ' + line); } } var mols = []; var molLines = molLinesReactants.concat(molLinesProducts); for (j = 0; j < molLines.length; ++j) { var mol = parseCTabV3000(molLines[j], countsSplit); mols.push(mol); } var ctab = utils.rxnMerge(mols, nReactants, nProducts, nAgents); readRGroups3000(ctab, function (array) { var res = []; for (var k = 0; k < array.length; ++k) res = res.concat(array[k]); return res; }(rGroups)); return ctab; } // split a line by spaces outside parentheses function spaceparsplit(line) { // eslint-disable-line max-statements /* reader */ var split = []; var pc = 0; var c; var i; var i0 = -1; var quoted = false; for (i = 0; i < line.length; ++i) { c = line[i]; if (c == '(') pc++; else if (c == ')') pc--; if (c == '"') quoted = !quoted; if (!quoted && line[i] == ' ' && pc == 0) { if (i > i0 + 1) split.push(line.slice(i0 + 1, i)); i0 = i; } } if (i > i0 + 1) split.push(line.slice(i0 + 1, i)); return split; } // utils function stripQuotes(str) { if (str[0] === '"' && str[str.length - 1] === '"') return str.substr(1, str.length - 2); return str; } function splitonce(line, delim) { /* reader */ var p = line.indexOf(delim); return [line.slice(0, p), line.slice(p + 1)]; } function splitSGroupDef(line) { // eslint-disable-line max-statements /* reader */ var split = []; var braceBalance = 0; var quoted = false; for (var i = 0; i < line.length; ++i) { var c = line.charAt(i); if (c == '"') { quoted = !quoted; } else if (!quoted) { if (c == '(') { braceBalance++; } else if (c == ')') { braceBalance--; } else if (c == ' ' && braceBalance == 0) { split.push(line.slice(0, i)); line = line.slice(i + 1).trim(); i = 0; } } } if (braceBalance != 0) throw new Error('Brace balance broken. S-group properies invalid!'); if (line.length > 0) split.push(line.trim()); return split; } function parseBracedNumberList(line, shift) { /* reader */ if (!line) return null; var list = []; line = line.trim(); line = line.substr(1, line.length - 2); var split = line.split(' '); shift = shift || 0; for (var i = 1; i < split.length; ++i) { var value = parseInt(split[i]); if (!isNaN(value)) list.push(value + shift); } return list; } function stripV30(line) { /* reader */ if (line.slice(0, 7) != 'M V30 ') throw new Error('Prefix invalid'); return line.slice(7); } function labelsListToIds(labels) { /* reader */ var ids = []; for (var i = 0; i < labels.length; ++i) ids.push(element.map[labels[i].trim()]); return ids; } module.exports = { parseCTabV3000: parseCTabV3000, readRGroups3000: readRGroups3000, parseRxn3000: parseRxn3000 };