Current Dev State

This commit is contained in:
Tim Lorsbach
2025-06-23 20:13:54 +02:00
parent b4f9bb277d
commit ded50edaa2
22617 changed files with 4345095 additions and 174 deletions

View File

@ -0,0 +1,269 @@
/****************************************************************************
* Copyright 2017 EPAM Systems
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
***************************************************************************/
var Set = require('../../util/set');
var v2000 = require('./v2000');
var v3000 = require('./v3000');
var Struct = require('./../struct/index');
var utils = require('./utils');
var loadRGroupFragments = true; // TODO: set to load the fragments
/* Parse Mol */
function parseMol(/* string */ ctabLines) /* Struct */ {
/* reader */
if (ctabLines[0].search('\\$MDL') == 0)
return v2000.parseRg2000(ctabLines);
var struct = parseCTab(ctabLines.slice(3));
struct.name = ctabLines[0].trim();
return struct;
}
function parseCTab(/* string */ ctabLines) /* Struct */ {
/* reader */
var countsSplit = partitionLine(ctabLines[0], utils.fmtInfo.countsLinePartition);
var version = countsSplit[11].trim();
ctabLines = ctabLines.slice(1);
if (version == 'V2000')
return v2000.parseCTabV2000(ctabLines, countsSplit);
else if (version == 'V3000')
return v3000.parseCTabV3000(ctabLines, !loadRGroupFragments);
else
throw new Error('Molfile version unknown: ' + version); // eslint-disable-line no-else-return
}
/* Parse Rxn */
function parseRxn(/* string[] */ ctabLines) /* Struct */ {
/* reader */
var split = ctabLines[0].trim().split(' ');
if (split.length > 1 && split[1] == 'V3000')
return v3000.parseRxn3000(ctabLines);
else
return v2000.parseRxn2000(ctabLines); // eslint-disable-line no-else-return
}
/* Prepare For Saving */
var prepareForSaving = {
MUL: Struct.SGroup.prepareMulForSaving,
SRU: prepareSruForSaving,
SUP: prepareSupForSaving,
DAT: prepareDatForSaving,
GEN: prepareGenForSaving
};
function prepareSruForSaving(sgroup, mol) {
var xBonds = [];
mol.bonds.each(function (bid, bond) {
var a1 = mol.atoms.get(bond.begin);
var a2 = mol.atoms.get(bond.end);
/* eslint-disable no-mixed-operators*/
if (Set.contains(a1.sgs, sgroup.id) && !Set.contains(a2.sgs, sgroup.id) ||
Set.contains(a2.sgs, sgroup.id) && !Set.contains(a1.sgs, sgroup.id))
/* eslint-enable no-mixed-operators*/
xBonds.push(bid);
}, sgroup);
if (xBonds.length != 0 && xBonds.length != 2)
throw { 'id': sgroup.id, 'error-type': 'cross-bond-number', 'message': 'Unsupported cross-bonds number' };
sgroup.bonds = xBonds;
}
function prepareSupForSaving(sgroup, mol) {
// This code is also used for GroupSru and should be moved into a separate common method
// It seems that such code should be used for any sgroup by this this should be checked
var xBonds = [];
mol.bonds.each(function (bid, bond) {
var a1 = mol.atoms.get(bond.begin);
var a2 = mol.atoms.get(bond.end);
/* eslint-disable no-mixed-operators*/
if (Set.contains(a1.sgs, sgroup.id) && !Set.contains(a2.sgs, sgroup.id) ||
Set.contains(a2.sgs, sgroup.id) && !Set.contains(a1.sgs, sgroup.id))
/* eslint-enable no-mixed-operators*/
xBonds.push(bid);
}, sgroup);
sgroup.bonds = xBonds;
}
function prepareGenForSaving(sgroup, mol) { // eslint-disable-line no-unused-vars
}
function prepareDatForSaving(sgroup, mol) {
sgroup.atoms = Struct.SGroup.getAtoms(mol, sgroup);
}
/* Save To Molfile */
var saveToMolfile = {
MUL: saveMulToMolfile,
SRU: saveSruToMolfile,
SUP: saveSupToMolfile,
DAT: saveDatToMolfile,
GEN: saveGenToMolfile
};
function saveMulToMolfile(sgroup, mol, sgMap, atomMap, bondMap) { // eslint-disable-line max-params
var idstr = (sgMap[sgroup.id] + '').padStart(3);
var lines = [];
lines = lines.concat(makeAtomBondLines('SAL', idstr, Object.keys(sgroup.atomSet), atomMap)); // TODO: check atomSet
lines = lines.concat(makeAtomBondLines('SPA', idstr, Object.keys(sgroup.parentAtomSet), atomMap));
lines = lines.concat(makeAtomBondLines('SBL', idstr, sgroup.bonds, bondMap));
var smtLine = 'M SMT ' + idstr + ' ' + sgroup.data.mul;
lines.push(smtLine);
lines = lines.concat(bracketsToMolfile(mol, sgroup, idstr));
return lines.join('\n');
}
function saveSruToMolfile(sgroup, mol, sgMap, atomMap, bondMap) { // eslint-disable-line max-params
var idstr = (sgMap[sgroup.id] + '').padStart(3);
var lines = [];
lines = lines.concat(makeAtomBondLines('SAL', idstr, sgroup.atoms, atomMap));
lines = lines.concat(makeAtomBondLines('SBL', idstr, sgroup.bonds, bondMap));
lines = lines.concat(bracketsToMolfile(mol, sgroup, idstr));
return lines.join('\n');
}
function saveSupToMolfile(sgroup, mol, sgMap, atomMap, bondMap) { // eslint-disable-line max-params
var idstr = (sgMap[sgroup.id] + '').padStart(3);
var lines = [];
lines = lines.concat(makeAtomBondLines('SAL', idstr, sgroup.atoms, atomMap));
lines = lines.concat(makeAtomBondLines('SBL', idstr, sgroup.bonds, bondMap));
if (sgroup.data.name && sgroup.data.name != '')
lines.push('M SMT ' + idstr + ' ' + sgroup.data.name);
return lines.join('\n');
}
function saveDatToMolfile(sgroup, mol, sgMap, atomMap) {
var idstr = (sgMap[sgroup.id] + '').padStart(3);
var data = sgroup.data;
var pp = sgroup.pp;
if (!data.absolute)
pp = pp.sub(Struct.SGroup.getMassCentre(mol, sgroup.atoms));
var lines = [];
lines = lines.concat(makeAtomBondLines('SAL', idstr, sgroup.atoms, atomMap));
var sdtLine = 'M SDT ' + idstr + ' ' +
(data.fieldName || '').padEnd(30) +
(data.fieldType || '').padStart(2) +
(data.units || '').padEnd(20) +
(data.query || '').padStart(2);
if (data.queryOp) // see gitlab #184
sdtLine += data.queryOp.padEnd(80 - 65);
lines.push(sdtLine);
var sddLine = 'M SDD ' + idstr +
' ' + utils.paddedNum(pp.x, 10, 4) + utils.paddedNum(-pp.y, 10, 4) +
' ' + // ' eee'
(data.attached ? 'A' : 'D') + // f
(data.absolute ? 'A' : 'R') + // g
(data.showUnits ? 'U' : ' ') + // h
' ' + // i
(data.nCharnCharsToDisplay >= 0 ? utils.paddedNum(data.nCharnCharsToDisplay, 3) : 'ALL') + // jjj
' 1 ' + // 'kkk ll '
(data.tagChar || ' ') + // m
' ' + utils.paddedNum(data.daspPos, 1) + // n
' '; // oo
lines.push(sddLine);
var val = normalizeNewlines(data.fieldValue).replace(/\n*$/, '');
var charsPerLine = 69;
val.split('\n').forEach(function (chars) {
while (chars.length > charsPerLine) {
lines.push('M SCD ' + idstr + ' ' + chars.slice(0, charsPerLine));
chars = chars.slice(charsPerLine);
}
lines.push('M SED ' + idstr + ' ' + chars);
});
return lines.join('\n');
}
function saveGenToMolfile(sgroup, mol, sgMap, atomMap, bondMap) { // eslint-disable-line max-params
var idstr = (sgMap[sgroup.id] + '').padStart(3);
var lines = [];
lines = lines.concat(makeAtomBondLines('SAL', idstr, sgroup.atoms, atomMap));
lines = lines.concat(makeAtomBondLines('SBL', idstr, sgroup.bonds, bondMap));
lines = lines.concat(bracketsToMolfile(mol, sgroup, idstr));
return lines.join('\n');
}
function makeAtomBondLines(prefix, idstr, ids, map) {
if (!ids)
return [];
var lines = [];
for (var i = 0; i < Math.floor((ids.length + 14) / 15); ++i) {
var rem = Math.min(ids.length - 15 * i, 15); // eslint-disable-line no-mixed-operators
var salLine = 'M ' + prefix + ' ' + idstr + ' ' + utils.paddedNum(rem, 2);
for (var j = 0; j < rem; ++j)
salLine += ' ' + utils.paddedNum(map[ids[i * 15 + j]], 3); // eslint-disable-line no-mixed-operators
lines.push(salLine);
}
return lines;
}
function bracketsToMolfile(mol, sg, idstr) { // eslint-disable-line max-statements
var inBonds = [];
var xBonds = [];
var atomSet = Set.fromList(sg.atoms);
Struct.SGroup.getCrossBonds(inBonds, xBonds, mol, atomSet);
Struct.SGroup.bracketPos(sg, mol, xBonds);
var bb = sg.bracketBox;
var d = sg.bracketDir;
var n = d.rotateSC(1, 0);
var brackets = Struct.SGroup.getBracketParameters(mol, xBonds, atomSet, bb, d, n);
var lines = [];
for (var i = 0; i < brackets.length; ++i) {
var bracket = brackets[i];
var a0 = bracket.c.addScaled(bracket.n, -0.5 * bracket.h).yComplement();
var a1 = bracket.c.addScaled(bracket.n, 0.5 * bracket.h).yComplement();
var line = 'M SDI ' + idstr + utils.paddedNum(4, 3);
var coord = [a0.x, a0.y, a1.x, a1.y];
for (var j = 0; j < coord.length; ++j)
line += utils.paddedNum(coord[j], 10, 4);
lines.push(line);
}
return lines;
}
// According Unicode Consortium sould be
// nlRe = /\r\n|[\n\v\f\r\x85\u2028\u2029]/g;
// http://www.unicode.org/reports/tr18/#Line_Boundaries
var nlRe = /\r\n|[\n\r]/g;
function normalizeNewlines(str) {
return str.replace(nlRe, '\n');
}
function partitionLine(/* string*/ str, /* array of int*/ parts, /* bool*/ withspace) {
/* reader */
var res = [];
for (var i = 0, shift = 0; i < parts.length; ++i) {
res.push(str.slice(shift, shift + parts[i]));
if (withspace)
shift++;
shift += parts[i];
}
return res;
}
module.exports = {
parseCTab: parseCTab,
parseMol: parseMol,
parseRxn: parseRxn,
prepareForSaving: prepareForSaving,
saveToMolfile: saveToMolfile
};

View File

@ -0,0 +1,54 @@
/****************************************************************************
* Copyright 2017 EPAM Systems
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
***************************************************************************/
var Molfile = require('./molfile');
// TODO: reconstruct molfile string instead parsing multiple times
// merge to bottom
function parseCTFile(str, options) {
var molfile = new Molfile();
var lines = str.split(/\r\n|[\n\r]/g);
try {
return molfile.parseCTFile(lines);
} catch (ex) {
if (options.badHeaderRecover) {
try {
// check whether there's an extra empty line on top
// this often happens when molfile text is pasted into the dialog window
return molfile.parseCTFile(lines.slice(1));
} catch (ex1) { //
}
try {
// check for a missing first line
// this sometimes happens when pasting
return molfile.parseCTFile([''].concat(lines));
} catch (ex2) { //
}
}
throw ex;
}
}
module.exports = {
stringify: function (struct, options) {
var opts = options || {};
return new Molfile(opts.v3000).saveMolecule(struct, opts.ignoreErrors,
opts.noRgroups, opts.preserveIndigoDesc);
},
parse: function (str, options) {
return parseCTFile(str, options || {});
}
};

View File

@ -0,0 +1,488 @@
/****************************************************************************
* Copyright 2017 EPAM Systems
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
***************************************************************************/
var element = require('./../element');
var common = require('./common');
var utils = require('./utils');
function Molfile(v3000) {
/* reader */
/* saver */
this.molecule = null;
this.molfile = null;
this.v3000 = v3000 || false;
}
Molfile.prototype.parseCTFile = function (molfileLines) {
var ret = null;
if (molfileLines[0].search('\\$RXN') == 0)
ret = common.parseRxn(molfileLines);
else
ret = common.parseMol(molfileLines);
ret.initHalfBonds();
ret.initNeighbors();
ret.markFragments();
return ret;
};
Molfile.prototype.prepareSGroups = function (skipErrors, preserveIndigoDesc) {
var mol = this.molecule;
var toRemove = [];
var errors = 0;
this.molecule.sGroupForest.getSGroupsBFS().reverse().forEach(function (id) {
var sgroup = mol.sgroups.get(id);
var errorIgnore = false;
try {
common.prepareForSaving[sgroup.type](sgroup, mol);
} catch (ex) {
if (!skipErrors || typeof (ex.id) != 'number')
throw ex;
errorIgnore = true;
}
/* eslint-disable no-mixed-operators*/
if (errorIgnore ||
!preserveIndigoDesc && /^INDIGO_.+_DESC$/i.test(sgroup.data.fieldName)) {
/* eslint-enable no-mixed-operators*/
errors += errorIgnore;
toRemove.push(sgroup.id);
}
}, this);
if (errors)
throw new Error('WARNING: ' + errors + ' invalid S-groups were detected. They will be omitted.');
for (var i = 0; i < toRemove.length; ++i)
mol.sGroupDelete(toRemove[i]);
return mol;
};
Molfile.prototype.getCTab = function (molecule, rgroups) {
/* saver */
this.molecule = molecule.clone();
this.molfile = '';
this.writeCTab2000(rgroups);
return this.molfile;
};
Molfile.prototype.saveMolecule = function (molecule, skipSGroupErrors, norgroups, preserveIndigoDesc) { // eslint-disable-line max-statements
/* saver */
this.reaction = molecule.rxnArrows.count() > 0;
if (molecule.rxnArrows.count() > 1)
throw new Error('Reaction may not contain more than one arrow');
this.molfile = '' + molecule.name;
if (this.reaction) {
if (molecule.rgroups.count() > 0)
throw new Error('Unable to save the structure - reactions with r-groups are not supported at the moment');
var components = molecule.getComponents();
var reactants = components.reactants;
var products = components.products;
var all = reactants.concat(products);
this.molfile = '$RXN\n\n\n\n' +
utils.paddedNum(reactants.length, 3) +
utils.paddedNum(products.length, 3) +
utils.paddedNum(0, 3) + '\n';
for (var i = 0; i < all.length; ++i) {
var saver = new Molfile(false);
var submol = molecule.clone(all[i], null, true);
var molfile = saver.saveMolecule(submol, false, true);
this.molfile += '$MOL\n' + molfile;
}
return this.molfile;
}
if (molecule.rgroups.count() > 0) {
if (norgroups) {
molecule = molecule.getScaffold();
} else {
var scaffold = new Molfile(false).getCTab(molecule.getScaffold(), molecule.rgroups);
this.molfile = '$MDL REV 1\n$MOL\n$HDR\n\n\n\n$END HDR\n';
this.molfile += '$CTAB\n' + scaffold + '$END CTAB\n';
molecule.rgroups.each(function (rgid, rg) {
this.molfile += '$RGP\n';
this.writePaddedNumber(rgid, 3);
this.molfile += '\n';
rg.frags.each(function (fnum, fid) {
var group = new Molfile(false).getCTab(molecule.getFragment(fid));
this.molfile += '$CTAB\n' + group + '$END CTAB\n';
}, this);
this.molfile += '$END RGP\n';
}, this);
this.molfile += '$END MOL\n';
return this.molfile;
}
}
this.molecule = molecule.clone();
this.prepareSGroups(skipSGroupErrors, preserveIndigoDesc);
this.writeHeader();
// TODO: saving to V3000
this.writeCTab2000();
return this.molfile;
};
Molfile.prototype.writeHeader = function () {
/* saver */
var date = new Date();
this.writeCR(); // TODO: write structure name
this.writeWhiteSpace(2);
this.write('Ketcher');
this.writeWhiteSpace();
this.writeCR(((date.getMonth() + 1) + '').padStart(2) + (date.getDate() + '').padStart(2) + ((date.getFullYear() % 100) + '').padStart(2) +
(date.getHours() + '').padStart(2) + (date.getMinutes() + '').padStart(2) + '2D 1 1.00000 0.00000 0');
this.writeCR();
};
Molfile.prototype.write = function (str) {
/* saver */
this.molfile += str;
};
Molfile.prototype.writeCR = function (str) {
/* saver */
if (arguments.length == 0)
str = '';
this.molfile += str + '\n';
};
Molfile.prototype.writeWhiteSpace = function (length) {
/* saver */
if (arguments.length == 0)
length = 1;
this.write(' '.repeat(Math.max(length, 0)));
};
Molfile.prototype.writePadded = function (str, width) {
/* saver */
this.write(str);
this.writeWhiteSpace(width - str.length);
};
Molfile.prototype.writePaddedNumber = function (number, width) {
/* saver */
var str = (number - 0).toString();
this.writeWhiteSpace(width - str.length);
this.write(str);
};
Molfile.prototype.writePaddedFloat = function (number, width, precision) {
/* saver */
this.write(utils.paddedNum(number, width, precision));
};
Molfile.prototype.writeCTab2000Header = function () {
/* saver */
this.writePaddedNumber(this.molecule.atoms.count(), 3);
this.writePaddedNumber(this.molecule.bonds.count(), 3);
this.writePaddedNumber(0, 3);
this.writeWhiteSpace(3);
this.writePaddedNumber(this.molecule.isChiral ? 1 : 0, 3);
this.writePaddedNumber(0, 3);
this.writeWhiteSpace(12);
this.writePaddedNumber(999, 3);
this.writeCR(' V2000');
};
Molfile.prototype.writeCTab2000 = function (rgroups) { // eslint-disable-line max-statements
/* saver */
this.writeCTab2000Header();
this.mapping = {};
var i = 1;
/* eslint-disable camelcase*/
var atomList_list = [];
var atomProps_list = [];
/* eslint-enable camel-case*/
this.molecule.atoms.each(function (id, atom) { // eslint-disable-line max-statements
this.writePaddedFloat(atom.pp.x, 10, 4);
this.writePaddedFloat(-atom.pp.y, 10, 4);
this.writePaddedFloat(atom.pp.z, 10, 4);
this.writeWhiteSpace();
var label = atom.label;
if (atom.atomList != null) {
label = 'L';
atomList_list.push(id);
} else if (atom['pseudo']) {
if (atom['pseudo'].length > 3) {
label = 'A';
atomProps_list.push({ id: id, value: "'" + atom['pseudo'] + "'" });
}
} else if (atom['alias']) {
atomProps_list.push({ id: id, value: atom['alias'] });
} else if (!element.map[label] && ['A', 'Q', 'X', '*', 'R#'].indexOf(label) == -1) { // search in generics?
label = 'C';
atomProps_list.push({ id: id, value: atom.label });
}
this.writePadded(label, 3);
this.writePaddedNumber(0, 2);
this.writePaddedNumber(0, 3);
this.writePaddedNumber(0, 3);
if (typeof atom.hCount === "undefined")
atom.hCount = 0;
this.writePaddedNumber(atom.hCount, 3);
if (typeof atom.stereoCare === "undefined")
atom.stereoCare = 0;
this.writePaddedNumber(atom.stereoCare, 3);
this.writePaddedNumber(atom.explicitValence < 0 ? 0 : (atom.explicitValence == 0 ? 15 : atom.explicitValence), 3); // eslint-disable-line no-nested-ternary
this.writePaddedNumber(0, 3);
this.writePaddedNumber(0, 3);
this.writePaddedNumber(0, 3);
if (typeof atom.aam === "undefined")
atom.aam = 0;
this.writePaddedNumber(atom.aam, 3);
if (typeof atom.invRet === "undefined")
atom.invRet = 0;
this.writePaddedNumber(atom.invRet, 3);
if (typeof atom.exactChangeFlag === "undefined")
atom.exactChangeFlag = 0;
this.writePaddedNumber(atom.exactChangeFlag, 3);
this.writeCR();
this.mapping[id] = i;
i++;
}, this);
this.bondMapping = {};
i = 1;
this.molecule.bonds.each(function (id, bond) {
this.bondMapping[id] = i++;
this.writePaddedNumber(this.mapping[bond.begin], 3);
this.writePaddedNumber(this.mapping[bond.end], 3);
this.writePaddedNumber(bond.type, 3);
if (typeof bond.stereo === "undefined")
bond.stereo = 0;
this.writePaddedNumber(bond.stereo, 3);
this.writePadded(bond.xxx, 3);
if (typeof bond.topology === "undefined")
bond.topology = 0;
this.writePaddedNumber(bond.topology, 3);
if (typeof bond.reactingCenterStatus === "undefined")
bond.reactingCenterStatus = 0;
this.writePaddedNumber(bond.reactingCenterStatus, 3);
this.writeCR();
}, this);
while (atomProps_list.length > 0) {
this.write('A ');
this.writePaddedNumber(atomProps_list[0].id + 1, 3);
this.writeCR();
this.writeCR(atomProps_list[0].value);
atomProps_list.splice(0, 1);
}
var chargeList = [];
var isotopeList = [];
var radicalList = [];
var rglabelList = [];
var rglogicList = [];
var aplabelList = [];
var rbcountList = [];
var unsaturatedList = [];
var substcountList = [];
this.molecule.atoms.each(function (id, atom) {
if (atom.charge != 0)
chargeList.push([id, atom.charge]);
if (atom.isotope != 0)
isotopeList.push([id, atom.isotope]);
if (atom.radical != 0)
radicalList.push([id, atom.radical]);
if (atom.rglabel != null && atom.label == 'R#') { // TODO need to force rglabel=null when label is not 'R#'
for (var rgi = 0; rgi < 32; rgi++)
if (atom.rglabel & (1 << rgi)) rglabelList.push([id, rgi + 1]);
}
if (atom.attpnt != null)
aplabelList.push([id, atom.attpnt]);
if (atom.ringBondCount != 0)
rbcountList.push([id, atom.ringBondCount]);
if (atom.substitutionCount != 0)
substcountList.push([id, atom.substitutionCount]);
if (atom.unsaturatedAtom != 0)
unsaturatedList.push([id, atom.unsaturatedAtom]);
});
if (rgroups) {
rgroups.each(function (rgid, rg) {
if (rg.resth || rg.ifthen > 0 || rg.range.length > 0) {
var line = ' 1 ' +
utils.paddedNum(rgid, 3) + ' ' +
utils.paddedNum(rg.ifthen, 3) + ' ' +
utils.paddedNum(rg.resth ? 1 : 0, 3) + ' ' + rg.range;
rglogicList.push(line);
}
});
}
function writeAtomPropList(propId, values) {
while (values.length > 0) {
var part = [];
while (values.length > 0 && part.length < 8) {
part.push(values[0]);
values.splice(0, 1);
}
this.write(propId);
this.writePaddedNumber(part.length, 3);
part.forEach(function (value) {
this.writeWhiteSpace();
this.writePaddedNumber(this.mapping[value[0]], 3);
this.writeWhiteSpace();
this.writePaddedNumber(value[1], 3);
}, this);
this.writeCR();
}
}
writeAtomPropList.call(this, 'M CHG', chargeList);
writeAtomPropList.call(this, 'M ISO', isotopeList);
writeAtomPropList.call(this, 'M RAD', radicalList);
writeAtomPropList.call(this, 'M RGP', rglabelList);
for (var j = 0; j < rglogicList.length; ++j)
this.write('M LOG' + rglogicList[j] + '\n');
writeAtomPropList.call(this, 'M APO', aplabelList);
writeAtomPropList.call(this, 'M RBC', rbcountList);
writeAtomPropList.call(this, 'M SUB', substcountList);
writeAtomPropList.call(this, 'M UNS', unsaturatedList);
if (atomList_list.length > 0) {
for (j = 0; j < atomList_list.length; ++j) {
var aid = atomList_list[j];
var atomList = this.molecule.atoms.get(aid).atomList;
this.write('M ALS');
this.writePaddedNumber(aid + 1, 4);
this.writePaddedNumber(atomList.ids.length, 3);
this.writeWhiteSpace();
this.write(atomList.notList ? 'T' : 'F');
var labelList = atomList.labelList();
for (var k = 0; k < labelList.length; ++k) {
this.writeWhiteSpace();
this.writePadded(labelList[k], 3);
}
this.writeCR();
}
}
var sgmap = {};
var cnt = 1;
var sgmapback = {};
var sgorder = this.molecule.sGroupForest.getSGroupsBFS();
sgorder.forEach(function (id) {
sgmapback[cnt] = id;
sgmap[id] = cnt++;
}, this);
for (var q = 1; q < cnt; ++q) { // each group on its own
var id = sgmapback[q];
var sgroup = this.molecule.sgroups.get(id);
this.write('M STY');
this.writePaddedNumber(1, 3);
this.writeWhiteSpace(1);
this.writePaddedNumber(q, 3);
this.writeWhiteSpace(1);
this.writePadded(sgroup.type, 3);
this.writeCR();
// TODO: write subtype, M SST
this.write('M SLB');
this.writePaddedNumber(1, 3);
this.writeWhiteSpace(1);
this.writePaddedNumber(q, 3);
this.writeWhiteSpace(1);
this.writePaddedNumber(q, 3);
this.writeCR();
var parentid = this.molecule.sGroupForest.parent.get(id);
if (parentid >= 0) {
this.write('M SPL');
this.writePaddedNumber(1, 3);
this.writeWhiteSpace(1);
this.writePaddedNumber(q, 3);
this.writeWhiteSpace(1);
this.writePaddedNumber(sgmap[parentid], 3);
this.writeCR();
}
// connectivity
if (sgroup.type == 'SRU' && sgroup.data.connectivity) {
var connectivity = '';
connectivity += ' ';
connectivity += q.toString().padStart(3);
connectivity += ' ';
connectivity += (sgroup.data.connectivity || '').padEnd(3);
this.write('M SCN');
this.writePaddedNumber(1, 3);
this.write(connectivity.toUpperCase());
this.writeCR();
}
if (sgroup.type == 'SRU') {
this.write('M SMT ');
this.writePaddedNumber(q, 3);
this.writeWhiteSpace();
this.write(sgroup.data.subscript || 'n');
this.writeCR();
}
this.writeCR(common.saveToMolfile[sgroup.type](sgroup, this.molecule, sgmap, this.mapping, this.bondMapping));
}
// TODO: write M APO
// TODO: write M AAL
// TODO: write M RGP
// TODO: write M LOG
this.writeCR('M END');
};
module.exports = Molfile;

View File

@ -0,0 +1,301 @@
/****************************************************************************
* Copyright 2017 EPAM Systems
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
***************************************************************************/
var Set = require('../../util/set');
var Vec2 = require('../../util/vec2');
var Struct = require('./../struct/index');
var utils = require('./utils');
function readKeyValuePairs(str, /* bool */ valueString) {
/* reader */
var ret = {};
var partition = utils.partitionLineFixed(str, 3, true);
var count = utils.parseDecimalInt(partition[0]);
for (var i = 0; i < count; ++i) {
/* eslint-disable no-mixed-operators*/
ret[utils.parseDecimalInt(partition[2 * i + 1]) - 1] =
valueString ? partition[2 * i + 2].trim() :
utils.parseDecimalInt(partition[2 * i + 2]);
/* eslint-enable no-mixed-operators*/
}
return ret;
}
function readKeyMultiValuePairs(str, /* bool */ valueString) {
/* reader */
var ret = [];
var partition = utils.partitionLineFixed(str, 3, true);
var count = utils.parseDecimalInt(partition[0]);
for (var i = 0; i < count; ++i) {
ret.push([
/* eslint-disable no-mixed-operators*/
utils.parseDecimalInt(partition[2 * i + 1]) - 1,
valueString ? partition[2 * i + 2].trim() : utils.parseDecimalInt(partition[2 * i + 2])
/* eslint-enable no-mixed-operators*/
]);
}
return ret;
}
function postLoadMul(sgroup, mol, atomMap) { // eslint-disable-line max-statements
sgroup.data.mul = sgroup.data.subscript - 0;
var atomReductionMap = {};
sgroup.atoms = Struct.SGroup.filterAtoms(sgroup.atoms, atomMap);
sgroup.patoms = Struct.SGroup.filterAtoms(sgroup.patoms, atomMap);
// mark repetitions for removal
for (var k = 1; k < sgroup.data.mul; ++k) {
for (var m = 0; m < sgroup.patoms.length; ++m) {
var raid = sgroup.atoms[k * sgroup.patoms.length + m]; // eslint-disable-line no-mixed-operators
if (raid < 0)
continue; // eslint-disable-line no-continue
if (sgroup.patoms[m] < 0)
throw new Error('parent atom missing');
atomReductionMap[raid] = sgroup.patoms[m]; // "merge" atom in parent
}
}
sgroup.patoms = Struct.SGroup.removeNegative(sgroup.patoms);
var patomsMap = identityMap(sgroup.patoms);
var bondsToRemove = [];
mol.bonds.each(function (bid, bond) {
var beginIn = bond.begin in atomReductionMap;
var endIn = bond.end in atomReductionMap;
// if both adjacent atoms of a bond are to be merged, remove it
/* eslint-disable no-mixed-operators*/
if (beginIn && endIn ||
beginIn && bond.end in patomsMap ||
endIn && bond.begin in patomsMap)
bondsToRemove.push(bid);
/* eslint-enable no-mixed-operators*/
// if just one atom is merged, modify the bond accordingly
else if (beginIn)
bond.begin = atomReductionMap[bond.begin];
else if (endIn)
bond.end = atomReductionMap[bond.end];
}, sgroup);
// apply removal lists
for (var b = 0; b < bondsToRemove.length; ++b)
mol.bonds.remove(bondsToRemove[b]);
for (var a in atomReductionMap) {
mol.atoms.remove(a);
atomMap[a] = -1;
}
sgroup.atoms = sgroup.patoms;
sgroup.patoms = null;
}
function postLoadSru(sgroup) {
sgroup.data.connectivity = (sgroup.data.connectivity || 'EU').trim().toLowerCase();
}
function postLoadSup(sgroup) {
sgroup.data.name = (sgroup.data.subscript || '').trim();
sgroup.data.subscript = '';
}
function postLoadGen(sgroup, mol, atomMap) { // eslint-disable-line no-unused-vars
}
function postLoadDat(sgroup, mol) {
if (!sgroup.data.absolute)
sgroup.pp = sgroup.pp.add(Struct.SGroup.getMassCentre(mol, sgroup.atoms));
}
function loadSGroup(mol, sg, atomMap) {
var postLoadMap = {
MUL: postLoadMul,
SRU: postLoadSru,
SUP: postLoadSup,
DAT: postLoadDat,
GEN: postLoadGen
};
// add the group to the molecule
sg.id = mol.sgroups.add(sg);
// apply type-specific post-processing
postLoadMap[sg.type](sg, mol, atomMap);
// mark atoms in the group as belonging to it
for (var s = 0; s < sg.atoms.length; ++s) {
if (mol.atoms.has(sg.atoms[s]))
Set.add(mol.atoms.get(sg.atoms[s]).sgs, sg.id);
}
if (sg.type == 'DAT')
mol.sGroupForest.insert(sg.id, -1, []);
else
mol.sGroupForest.insert(sg.id);
return sg.id;
}
function initSGroup(sGroups, propData) {
/* reader */
var kv = readKeyValuePairs(propData, true);
for (var key in kv) {
var type = kv[key];
if (!(type in Struct.SGroup.TYPES))
throw new Error('Unsupported S-group type');
var sg = new Struct.SGroup(type);
sg.number = key;
sGroups[key] = sg;
}
}
function applySGroupProp(sGroups, propName, propData, numeric, core) { // eslint-disable-line max-params
var kv = readKeyValuePairs(propData, !(numeric));
for (var key in kv)
// "core" properties are stored directly in an sgroup, not in sgroup.data
(core ? sGroups[key] : sGroups[key].data)[propName] = kv[key];
}
function applySGroupArrayProp(sGroups, propName, propData, shift) {
/* reader */
var sid = utils.parseDecimalInt(propData.slice(1, 4)) - 1;
var num = utils.parseDecimalInt(propData.slice(4, 8));
var part = toIntArray(utils.partitionLineFixed(propData.slice(8), 3, true));
if (part.length != num)
throw new Error('File format invalid');
if (shift) {
part = part.map(function (v) {
return v + shift;
});
}
sGroups[sid][propName] = sGroups[sid][propName].concat(part);
}
function applyDataSGroupName(sg, name) {
/* reader */
sg.data.fieldName = name;
}
function applyDataSGroupQuery(sg, query) {
/* reader */
sg.data.query = query;
}
function applyDataSGroupQueryOp(sg, queryOp) {
/* reader */
sg.data.queryOp = queryOp;
}
function applyDataSGroupDesc(sGroups, propData) {
/* reader */
var split = utils.partitionLine(propData, [4, 31, 2, 20, 2, 3], false);
var id = utils.parseDecimalInt(split[0]) - 1;
var fieldName = split[1].trim();
var fieldType = split[2].trim();
var units = split[3].trim();
var query = split[4].trim();
var queryOp = split[5].trim();
var sGroup = sGroups[id];
sGroup.data.fieldType = fieldType;
sGroup.data.fieldName = fieldName;
sGroup.data.units = units;
sGroup.data.query = query;
sGroup.data.queryOp = queryOp;
}
function applyDataSGroupInfo(sg, propData) { // eslint-disable-line max-statements
/* reader */
var split = utils.partitionLine(propData, [10/* x.x*/, 10/* y.y*/, 4/* eee*/, 1/* f*/, 1/* g*/, 1/* h*/, 3/* i */, 3/* jjj*/, 3/* kkk*/, 3/* ll*/, 2/* m*/, 3/* n*/, 2/* oo*/], false);
var x = parseFloat(split[0]);
var y = parseFloat(split[1]);
var attached = split[3].trim() == 'A';
var absolute = split[4].trim() == 'A';
var showUnits = split[5].trim() == 'U';
var nCharsToDisplay = split[7].trim();
nCharsToDisplay = nCharsToDisplay == 'ALL' ? -1 : utils.parseDecimalInt(nCharsToDisplay);
var tagChar = split[10].trim();
var daspPos = utils.parseDecimalInt(split[11].trim());
sg.pp = new Vec2(x, -y);
sg.data.attached = attached;
sg.data.absolute = absolute;
sg.data.showUnits = showUnits;
sg.data.nCharsToDisplay = nCharsToDisplay;
sg.data.tagChar = tagChar;
sg.data.daspPos = daspPos;
}
function applyDataSGroupInfoLine(sGroups, propData) {
/* reader */
var id = utils.parseDecimalInt(propData.substr(0, 4)) - 1;
var sg = sGroups[id];
applyDataSGroupInfo(sg, propData.substr(5));
}
function applyDataSGroupData(sg, data, finalize) {
/* reader */
sg.data.fieldValue = (sg.data.fieldValue || '') + data;
if (finalize) {
sg.data.fieldValue = trimRight(sg.data.fieldValue);
if (sg.data.fieldValue.startsWith('"') && sg.data.fieldValue.endsWith('"'))
sg.data.fieldValue = sg.data.fieldValue.substr(1, sg.data.fieldValue.length - 2);
}
}
function applyDataSGroupDataLine(sGroups, propData, finalize) {
/* reader */
var id = utils.parseDecimalInt(propData.substr(0, 5)) - 1;
var data = propData.substr(5);
var sg = sGroups[id];
applyDataSGroupData(sg, data, finalize);
}
// Utilities functions
function toIntArray(strArray) {
/* reader */
var ret = [];
for (var j = 0; j < strArray.length; ++j)
ret[j] = utils.parseDecimalInt(strArray[j]);
return ret;
}
function trimRight(str) {
return str.replace(/\s+$/, '');
}
function identityMap(array) {
var map = {};
for (var i = 0; i < array.length; ++i)
map[array[i]] = array[i];
return map;
}
module.exports = {
readKeyValuePairs: readKeyValuePairs,
readKeyMultiValuePairs: readKeyMultiValuePairs,
loadSGroup: loadSGroup,
initSGroup: initSGroup,
applySGroupProp: applySGroupProp,
applySGroupArrayProp: applySGroupArrayProp,
applyDataSGroupName: applyDataSGroupName,
applyDataSGroupQuery: applyDataSGroupQuery,
applyDataSGroupQueryOp: applyDataSGroupQueryOp,
applyDataSGroupDesc: applyDataSGroupDesc,
applyDataSGroupInfo: applyDataSGroupInfo,
applyDataSGroupData: applyDataSGroupData,
applyDataSGroupInfoLine: applyDataSGroupInfoLine,
applyDataSGroupDataLine: applyDataSGroupDataLine
};

View File

@ -0,0 +1,274 @@
/****************************************************************************
* Copyright 2017 EPAM Systems
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
***************************************************************************/
var Vec2 = require('../../util/vec2');
var Struct = require('./../struct/index');
function paddedNum(number, width, precision) {
var numStr = number.toFixed(precision || 0).replace(',', '.'); // Really need to replace?
if (numStr.length > width)
throw new Error('number does not fit');
return numStr.padStart(width);
}
function parseDecimalInt(str) {
/* reader */
var val = parseInt(str, 10);
return isNaN(val) ? 0 : val;
}
function partitionLine(/* string*/ str, /* array of int*/ parts, /* bool*/ withspace) {
/* reader */
var res = [];
for (var i = 0, shift = 0; i < parts.length; ++i) {
res.push(str.slice(shift, shift + parts[i]));
if (withspace)
shift++;
shift += parts[i];
}
return res;
}
function partitionLineFixed(/* string*/ str, /* int*/ itemLength, /* bool*/ withspace) {
/* reader */
var res = [];
for (var shift = 0; shift < str.length; shift += itemLength) {
res.push(str.slice(shift, shift + itemLength));
if (withspace)
shift++;
}
return res;
}
var fmtInfo = {
bondTypeMap: {
1: Struct.Bond.PATTERN.TYPE.SINGLE,
2: Struct.Bond.PATTERN.TYPE.DOUBLE,
3: Struct.Bond.PATTERN.TYPE.TRIPLE,
4: Struct.Bond.PATTERN.TYPE.AROMATIC,
5: Struct.Bond.PATTERN.TYPE.SINGLE_OR_DOUBLE,
6: Struct.Bond.PATTERN.TYPE.SINGLE_OR_AROMATIC,
7: Struct.Bond.PATTERN.TYPE.DOUBLE_OR_AROMATIC,
8: Struct.Bond.PATTERN.TYPE.ANY
},
bondStereoMap: {
0: Struct.Bond.PATTERN.STEREO.NONE,
1: Struct.Bond.PATTERN.STEREO.UP,
4: Struct.Bond.PATTERN.STEREO.EITHER,
6: Struct.Bond.PATTERN.STEREO.DOWN,
3: Struct.Bond.PATTERN.STEREO.CIS_TRANS
},
v30bondStereoMap: {
0: Struct.Bond.PATTERN.STEREO.NONE,
1: Struct.Bond.PATTERN.STEREO.UP,
2: Struct.Bond.PATTERN.STEREO.EITHER,
3: Struct.Bond.PATTERN.STEREO.DOWN
},
bondTopologyMap: {
0: Struct.Bond.PATTERN.TOPOLOGY.EITHER,
1: Struct.Bond.PATTERN.TOPOLOGY.RING,
2: Struct.Bond.PATTERN.TOPOLOGY.CHAIN
},
countsLinePartition: [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 6],
atomLinePartition: [10, 10, 10, 1, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3],
bondLinePartition: [3, 3, 3, 3, 3, 3, 3],
atomListHeaderPartition: [3, 1, 1, 4, 1, 1],
atomListHeaderLength: 11, // = atomListHeaderPartition.reduce(function(a,b) { return a + b; }, 0)
atomListHeaderItemLength: 4,
chargeMap: [0, +3, +2, +1, 0, -1, -2, -3],
valenceMap: [undefined, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0],
implicitHydrogenMap: [undefined, 0, 1, 2, 3, 4],
v30atomPropMap: {
CHG: 'charge',
RAD: 'radical',
MASS: 'isotope',
VAL: 'explicitValence',
HCOUNT: 'hCount',
INVRET: 'invRet',
SUBST: 'substitutionCount',
UNSAT: 'unsaturatedAtom',
RBCNT: 'ringBondCount'
},
rxnItemsPartition: [3, 3, 3]
};
var FRAGMENT = {
NONE: 0,
REACTANT: 1,
PRODUCT: 2,
AGENT: 3
};
var SHOULD_REACTION_FRAGMENT_RELAYOUT = true;
var SHOULD_RESCALE_MOLECULES = true;
function rxnMerge(mols, nReactants, nProducts) /* Struct */ { // eslint-disable-line max-statements
/* reader */
var ret = new Struct();
var bbReact = [],
bbAgent = [],
bbProd = [];
var molReact = [],
molAgent = [],
molProd = [];
var j;
var bondLengthData = { cnt: 0, totalLength: 0 };
for (j = 0; j < mols.length; ++j) {
var mol = mols[j];
var bondLengthDataMol = mol.getBondLengthData();
bondLengthData.cnt += bondLengthDataMol.cnt;
bondLengthData.totalLength += bondLengthDataMol.totalLength;
}
if (SHOULD_RESCALE_MOLECULES) {
var avgBondLength = 1 / (bondLengthData.cnt == 0 ? 1 : bondLengthData.totalLength / bondLengthData.cnt);
for (j = 0; j < mols.length; ++j) {
mol = mols[j];
mol.scale(avgBondLength);
}
}
for (j = 0; j < mols.length; ++j) {
mol = mols[j];
var bb = mol.getCoordBoundingBoxObj();
if (!bb)
continue; // eslint-disable-line no-continue
var fragmentType = (j < nReactants ? FRAGMENT.REACTANT : // eslint-disable-line no-nested-ternary
(j < nReactants + nProducts ? FRAGMENT.PRODUCT :
FRAGMENT.AGENT));
if (fragmentType == FRAGMENT.REACTANT) {
bbReact.push(bb);
molReact.push(mol);
} else if (fragmentType == FRAGMENT.AGENT) {
bbAgent.push(bb);
molAgent.push(mol);
} else if (fragmentType == FRAGMENT.PRODUCT) {
bbProd.push(bb);
molProd.push(mol);
}
mol.atoms.each(function (aid, atom) {
atom.rxnFragmentType = fragmentType;
});
}
function shiftMol(ret, mol, bb, xorig, over) { // eslint-disable-line max-params
var d = new Vec2(xorig - bb.min.x, over ? 1 - bb.min.y : -(bb.min.y + bb.max.y) / 2);
mol.atoms.each(function (aid, atom) {
atom.pp.add_(d); // eslint-disable-line no-underscore-dangle
});
mol.sgroups.each(function (id, item) {
if (item.pp)
item.pp.add_(d); // eslint-disable-line no-underscore-dangle
});
bb.min.add_(d); // eslint-disable-line no-underscore-dangle
bb.max.add_(d); // eslint-disable-line no-underscore-dangle
mol.mergeInto(ret);
return bb.max.x - bb.min.x;
}
if (SHOULD_REACTION_FRAGMENT_RELAYOUT) {
// reaction fragment layout
var xorig = 0;
for (j = 0; j < molReact.length; ++j)
xorig += shiftMol(ret, molReact[j], bbReact[j], xorig, false) + 2.0;
xorig += 2.0;
for (j = 0; j < molAgent.length; ++j)
xorig += shiftMol(ret, molAgent[j], bbAgent[j], xorig, true) + 2.0;
xorig += 2.0;
for (j = 0; j < molProd.length; ++j)
xorig += shiftMol(ret, molProd[j], bbProd[j], xorig, false) + 2.0;
} else {
for (j = 0; j < molReact.length; ++j)
molReact[j].mergeInto(ret);
for (j = 0; j < molAgent.length; ++j)
molAgent[j].mergeInto(ret);
for (j = 0; j < molProd.length; ++j)
molProd[j].mergeInto(ret);
}
var bb1;
var bb2;
var x;
var y;
var bbReactAll = null;
var bbProdAll = null;
for (j = 0; j < bbReact.length - 1; ++j) {
bb1 = bbReact[j];
bb2 = bbReact[j + 1];
x = (bb1.max.x + bb2.min.x) / 2;
y = (bb1.max.y + bb1.min.y + bb2.max.y + bb2.min.y) / 4;
ret.rxnPluses.add(new Struct.RxnPlus({ pp: new Vec2(x, y) }));
}
for (j = 0; j < bbReact.length; ++j) {
if (j == 0) {
bbReactAll = {};
bbReactAll.max = new Vec2(bbReact[j].max);
bbReactAll.min = new Vec2(bbReact[j].min);
} else {
bbReactAll.max = Vec2.max(bbReactAll.max, bbReact[j].max);
bbReactAll.min = Vec2.min(bbReactAll.min, bbReact[j].min);
}
}
for (j = 0; j < bbProd.length - 1; ++j) {
bb1 = bbProd[j];
bb2 = bbProd[j + 1];
x = (bb1.max.x + bb2.min.x) / 2;
y = (bb1.max.y + bb1.min.y + bb2.max.y + bb2.min.y) / 4;
ret.rxnPluses.add(new Struct.RxnPlus({ pp: new Vec2(x, y) }));
}
for (j = 0; j < bbProd.length; ++j) {
if (j == 0) {
bbProdAll = {};
bbProdAll.max = new Vec2(bbProd[j].max);
bbProdAll.min = new Vec2(bbProd[j].min);
} else {
bbProdAll.max = Vec2.max(bbProdAll.max, bbProd[j].max);
bbProdAll.min = Vec2.min(bbProdAll.min, bbProd[j].min);
}
}
bb1 = bbReactAll;
bb2 = bbProdAll;
if (!bb1 && !bb2) {
ret.rxnArrows.add(new Struct.RxnArrow({ pp: new Vec2(0, 0) }));
} else {
var v1 = bb1 ? new Vec2(bb1.max.x, (bb1.max.y + bb1.min.y) / 2) : null;
var v2 = bb2 ? new Vec2(bb2.min.x, (bb2.max.y + bb2.min.y) / 2) : null;
var defaultOffset = 3;
if (!v1)
v1 = new Vec2(v2.x - defaultOffset, v2.y);
if (!v2)
v2 = new Vec2(v1.x + defaultOffset, v1.y);
ret.rxnArrows.add(new Struct.RxnArrow({ pp: Vec2.lc2(v1, 0.5, v2, 0.5) }));
}
ret.isReaction = true;
return ret;
}
module.exports = {
fmtInfo: fmtInfo,
paddedNum: paddedNum,
parseDecimalInt: parseDecimalInt,
partitionLine: partitionLine,
partitionLineFixed: partitionLineFixed,
rxnMerge: rxnMerge
};

View File

@ -0,0 +1,426 @@
/****************************************************************************
* Copyright 2017 EPAM Systems
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
***************************************************************************/
var Vec2 = require('../../util/vec2');
var Map = require('../../util/map');
var element = require('./../element');
var Struct = require('./../struct/index');
var sGroup = require('./parseSGroup');
var utils = require('./utils');
var loadRGroupFragments = true; // TODO: set to load the fragments
function parseAtomLine(atomLine) {
/* reader */
var atomSplit = utils.partitionLine(atomLine, utils.fmtInfo.atomLinePartition);
var params =
{
// generic
pp: new Vec2(parseFloat(atomSplit[0]), -parseFloat(atomSplit[1]), parseFloat(atomSplit[2])),
label: atomSplit[4].trim(),
explicitValence: utils.fmtInfo.valenceMap[utils.parseDecimalInt(atomSplit[10])],
// obsolete
massDifference: utils.parseDecimalInt(atomSplit[5]),
charge: utils.fmtInfo.chargeMap[utils.parseDecimalInt(atomSplit[6])],
// query
hCount: utils.parseDecimalInt(utils.parseDecimalInt(atomSplit[8])),
stereoCare: utils.parseDecimalInt(atomSplit[9]) != 0,
// reaction
aam: utils.parseDecimalInt(atomSplit[14]),
invRet: utils.parseDecimalInt(atomSplit[15]),
// reaction query
exactChangeFlag: utils.parseDecimalInt(atomSplit[16]) != 0
};
return new Struct.Atom(params);
}
function parseBondLine(bondLine) {
/* reader */
var bondSplit = utils.partitionLine(bondLine, utils.fmtInfo.bondLinePartition);
var params =
{
begin: utils.parseDecimalInt(bondSplit[0]) - 1,
end: utils.parseDecimalInt(bondSplit[1]) - 1,
type: utils.fmtInfo.bondTypeMap[utils.parseDecimalInt(bondSplit[2])],
stereo: utils.fmtInfo.bondStereoMap[utils.parseDecimalInt(bondSplit[3])],
xxx: bondSplit[4],
topology: utils.fmtInfo.bondTopologyMap[utils.parseDecimalInt(bondSplit[5])],
reactingCenterStatus: utils.parseDecimalInt(bondSplit[6])
};
return new Struct.Bond(params);
}
function parseAtomListLine(/* string */atomListLine) {
/* reader */
var split = utils.partitionLine(atomListLine, utils.fmtInfo.atomListHeaderPartition);
var number = utils.parseDecimalInt(split[0]) - 1;
var notList = (split[2].trim() == 'T');
var count = utils.parseDecimalInt(split[4].trim());
var ids = atomListLine.slice(utils.fmtInfo.atomListHeaderLength);
var list = [];
var itemLength = utils.fmtInfo.atomListHeaderItemLength;
for (var i = 0; i < count; ++i)
list[i] = utils.parseDecimalInt(ids.slice(i * itemLength, ((i + 1) * itemLength) - 1));
return {
aid: number,
atomList: new Struct.AtomList({
notList: notList,
ids: list
})
};
}
function parsePropertyLines(ctab, ctabLines, shift, end, sGroups, rLogic) { // eslint-disable-line max-statements, max-params
/* reader */
var props = new Map();
while (shift < end) {
var line = ctabLines[shift];
if (line.charAt(0) == 'A') {
var propValue = ctabLines[++shift];
var isPseudo = /'.+'/.test(propValue);
if (isPseudo && !props.get('pseudo'))
props.set('pseudo', new Map());
if (!isPseudo && !props.get('alias'))
props.set('alias', new Map());
if (isPseudo) propValue = propValue.replace(/'/g, '');
props.get(isPseudo ? 'pseudo' : 'alias').set(utils.parseDecimalInt(line.slice(3, 6)) - 1, propValue);
} else if (line.charAt(0) == 'M') {
var type = line.slice(3, 6);
var propertyData = line.slice(6);
if (type == 'END') {
break;
} else if (type == 'CHG') {
if (!props.get('charge'))
props.set('charge', new Map());
props.get('charge').update(sGroup.readKeyValuePairs(propertyData));
} else if (type == 'RAD') {
if (!props.get('radical'))
props.set('radical', new Map());
props.get('radical').update(sGroup.readKeyValuePairs(propertyData));
} else if (type == 'ISO') {
if (!props.get('isotope'))
props.set('isotope', new Map());
props.get('isotope').update(sGroup.readKeyValuePairs(propertyData));
} else if (type == 'RBC') {
if (!props.get('ringBondCount'))
props.set('ringBondCount', new Map());
props.get('ringBondCount').update(sGroup.readKeyValuePairs(propertyData));
} else if (type == 'SUB') {
if (!props.get('substitutionCount'))
props.set('substitutionCount', new Map());
props.get('substitutionCount').update(sGroup.readKeyValuePairs(propertyData));
} else if (type == 'UNS') {
if (!props.get('unsaturatedAtom'))
props.set('unsaturatedAtom', new Map());
props.get('unsaturatedAtom').update(sGroup.readKeyValuePairs(propertyData));
// else if (type == "LIN") // link atom
} else if (type == 'RGP') { // rgroup atom
if (!props.get('rglabel'))
props.set('rglabel', new Map());
var rglabels = props.get('rglabel');
var a2rs = sGroup.readKeyMultiValuePairs(propertyData);
for (var a2ri = 0; a2ri < a2rs.length; a2ri++) {
var a2r = a2rs[a2ri];
rglabels.set(a2r[0], (rglabels.get(a2r[0]) || 0) | (1 << (a2r[1] - 1)));
}
} else if (type == 'LOG') { // rgroup atom
propertyData = propertyData.slice(4);
var rgid = utils.parseDecimalInt(propertyData.slice(0, 3).trim());
var iii = utils.parseDecimalInt(propertyData.slice(4, 7).trim());
var hhh = utils.parseDecimalInt(propertyData.slice(8, 11).trim());
var ooo = propertyData.slice(12).trim();
var logic = {};
if (iii > 0)
logic.ifthen = iii;
logic.resth = hhh == 1;
logic.range = ooo;
rLogic[rgid] = logic;
} else if (type == 'APO') {
if (!props.get('attpnt'))
props.set('attpnt', new Map());
props.get('attpnt').update(sGroup.readKeyValuePairs(propertyData));
} else if (type == 'ALS') { // atom list
if (!props.get('atomList'))
props.set('atomList', new Map());
var list = parsePropertyLineAtomList(
utils.partitionLine(propertyData, [1, 3, 3, 1, 1, 1]),
utils.partitionLineFixed(propertyData.slice(10), 4, false));
props.get('atomList').update(
list);
if (!props.get('label'))
props.set('label', new Map());
for (var aid in list) props.get('label').set(aid, 'L#');
} else if (type == 'STY') { // introduce s-group
sGroup.initSGroup(sGroups, propertyData);
} else if (type == 'SST') {
sGroup.applySGroupProp(sGroups, 'subtype', propertyData);
} else if (type == 'SLB') {
sGroup.applySGroupProp(sGroups, 'label', propertyData, true);
} else if (type == 'SPL') {
sGroup.applySGroupProp(sGroups, 'parent', propertyData, true, true);
} else if (type == 'SCN') {
sGroup.applySGroupProp(sGroups, 'connectivity', propertyData);
} else if (type == 'SAL') {
sGroup.applySGroupArrayProp(sGroups, 'atoms', propertyData, -1);
} else if (type == 'SBL') {
sGroup.applySGroupArrayProp(sGroups, 'bonds', propertyData, -1);
} else if (type == 'SPA') {
sGroup.applySGroupArrayProp(sGroups, 'patoms', propertyData, -1);
} else if (type == 'SMT') {
var sid = utils.parseDecimalInt(propertyData.slice(0, 4)) - 1;
sGroups[sid].data.subscript = propertyData.slice(4).trim();
} else if (type == 'SDT') {
sGroup.applyDataSGroupDesc(sGroups, propertyData);
} else if (type == 'SDD') {
sGroup.applyDataSGroupInfoLine(sGroups, propertyData);
} else if (type == 'SCD') {
sGroup.applyDataSGroupDataLine(sGroups, propertyData, false);
} else if (type == 'SED') {
sGroup.applyDataSGroupDataLine(sGroups, propertyData, true);
}
}
++shift;
}
return props;
}
function applyAtomProp(atoms /* Pool */, values /* Map */, propId /* string */) {
/* reader */
values.each(function (aid, propVal) {
atoms.get(aid)[propId] = propVal;
});
}
function parseCTabV2000(ctabLines, countsSplit) { // eslint-disable-line max-statements
/* reader */
var ctab = new Struct();
var i;
var atomCount = utils.parseDecimalInt(countsSplit[0]);
var bondCount = utils.parseDecimalInt(countsSplit[1]);
var atomListCount = utils.parseDecimalInt(countsSplit[2]);
ctab.isChiral = utils.parseDecimalInt(countsSplit[4]) != 0;
var stextLinesCount = utils.parseDecimalInt(countsSplit[5]);
var propertyLinesCount = utils.parseDecimalInt(countsSplit[10]);
var shift = 0;
var atomLines = ctabLines.slice(shift, shift + atomCount);
shift += atomCount;
var bondLines = ctabLines.slice(shift, shift + bondCount);
shift += bondCount;
var atomListLines = ctabLines.slice(shift, shift + atomListCount);
shift += atomListCount + stextLinesCount;
var atoms = atomLines.map(parseAtomLine);
for (i = 0; i < atoms.length; ++i)
ctab.atoms.add(atoms[i]);
var bonds = bondLines.map(parseBondLine);
for (i = 0; i < bonds.length; ++i)
ctab.bonds.add(bonds[i]);
var atomLists = atomListLines.map(parseAtomListLine);
atomLists.forEach(function (pair) {
ctab.atoms.get(pair.aid).atomList = pair.atomList;
ctab.atoms.get(pair.aid).label = 'L#';
});
var sGroups = {};
var rLogic = {};
var props = parsePropertyLines(ctab, ctabLines, shift,
Math.min(ctabLines.length, shift + propertyLinesCount), sGroups, rLogic);
props.each(function (propId, values) {
applyAtomProp(ctab.atoms, values, propId);
});
var atomMap = {};
var sid;
for (sid in sGroups) {
var sg = sGroups[sid];
if (sg.type === 'DAT' && sg.atoms.length === 0) {
var parent = sGroups[sid].parent;
if (parent >= 0) {
var psg = sGroups[parent - 1];
if (psg.type === 'GEN')
sg.atoms = [].slice.call(psg.atoms);
}
}
}
for (sid in sGroups)
sGroup.loadSGroup(ctab, sGroups[sid], atomMap);
var emptyGroups = [];
for (sid in sGroups) { // TODO: why do we need that?
Struct.SGroup.filter(ctab, sGroups[sid], atomMap);
if (sGroups[sid].atoms.length == 0 && !sGroups[sid].allAtoms)
emptyGroups.push(sid);
}
for (i = 0; i < emptyGroups.length; ++i) {
ctab.sGroupForest.remove(emptyGroups[i]);
ctab.sgroups.remove(emptyGroups[i]);
}
for (var rgid in rLogic)
ctab.rgroups.set(rgid, new Struct.RGroup(rLogic[rgid]));
return ctab;
}
function parseRg2000(/* string[] */ ctabLines) /* Struct */ { // eslint-disable-line max-statements
ctabLines = ctabLines.slice(7);
if (ctabLines[0].trim() != '$CTAB')
throw new Error('RGFile format invalid');
var i = 1;
while (ctabLines[i].charAt(0) != '$') i++;
if (ctabLines[i].trim() != '$END CTAB')
throw new Error('RGFile format invalid');
var coreLines = ctabLines.slice(1, i);
ctabLines = ctabLines.slice(i + 1);
var fragmentLines = {};
while (true) { // eslint-disable-line no-constant-condition
if (ctabLines.length == 0)
throw new Error('Unexpected end of file');
var line = ctabLines[0].trim();
if (line == '$END MOL') {
ctabLines = ctabLines.slice(1);
break;
}
if (line != '$RGP')
throw new Error('RGFile format invalid');
var rgid = ctabLines[1].trim() - 0;
fragmentLines[rgid] = [];
ctabLines = ctabLines.slice(2);
while (true) { // eslint-disable-line no-constant-condition
if (ctabLines.length == 0)
throw new Error('Unexpected end of file');
line = ctabLines[0].trim();
if (line == '$END RGP') {
ctabLines = ctabLines.slice(1);
break;
}
if (line != '$CTAB')
throw new Error('RGFile format invalid');
i = 1;
while (ctabLines[i].charAt(0) != '$') i++;
if (ctabLines[i].trim() != '$END CTAB')
throw new Error('RGFile format invalid');
fragmentLines[rgid].push(ctabLines.slice(1, i));
ctabLines = ctabLines.slice(i + 1);
}
}
var core = parseCTab(coreLines);
var frag = {};
if (loadRGroupFragments) {
for (var id in fragmentLines) {
frag[id] = [];
for (var j = 0; j < fragmentLines[id].length; ++j)
frag[id].push(parseCTab(fragmentLines[id][j]));
}
}
return rgMerge(core, frag);
}
function parseRxn2000(/* string[] */ ctabLines) /* Struct */ { // eslint-disable-line max-statements
/* reader */
ctabLines = ctabLines.slice(4);
var countsSplit = utils.partitionLine(ctabLines[0], utils.fmtInfo.rxnItemsPartition);
var nReactants = countsSplit[0] - 0,
nProducts = countsSplit[1] - 0,
nAgents = countsSplit[2] - 0;
ctabLines = ctabLines.slice(1); // consume counts line
var mols = [];
while (ctabLines.length > 0 && ctabLines[0].substr(0, 4) == '$MOL') {
ctabLines = ctabLines.slice(1);
var n = 0;
while (n < ctabLines.length && ctabLines[n].substr(0, 4) != '$MOL') n++;
var lines = ctabLines.slice(0, n);
var struct;
if (lines[0].search('\\$MDL') == 0) {
struct = parseRg2000(lines);
} else {
struct = parseCTab(lines.slice(3));
struct.name = lines[0].trim();
}
mols.push(struct);
ctabLines = ctabLines.slice(n);
}
return utils.rxnMerge(mols, nReactants, nProducts, nAgents);
}
function parseCTab(/* string */ ctabLines) /* Struct */ {
/* reader */
var countsSplit = utils.partitionLine(ctabLines[0], utils.fmtInfo.countsLinePartition);
ctabLines = ctabLines.slice(1);
return parseCTabV2000(ctabLines, countsSplit);
}
function rgMerge(scaffold, rgroups) /* Struct */ {
/* reader */
var ret = new Struct();
scaffold.mergeInto(ret, null, null, false, true);
for (var rgid in rgroups) {
for (var j = 0; j < rgroups[rgid].length; ++j) {
var ctab = rgroups[rgid][j];
ctab.rgroups.set(rgid, new Struct.RGroup());
var frag = {};
var frid = ctab.frags.add(frag);
ctab.rgroups.get(rgid).frags.add(frid);
ctab.atoms.each(function (aid, atom) {
atom.fragment = frid;
});
ctab.mergeInto(ret);
}
}
return ret;
}
function labelsListToIds(labels) {
/* reader */
var ids = [];
for (var i = 0; i < labels.length; ++i)
ids.push(element.map[labels[i].trim()]);
return ids;
}
function parsePropertyLineAtomList(hdr, lst) {
/* reader */
var aid = utils.parseDecimalInt(hdr[1]) - 1;
var count = utils.parseDecimalInt(hdr[2]);
var notList = hdr[4].trim() == 'T';
var ids = labelsListToIds(lst.slice(0, count));
var ret = {};
ret[aid] = new Struct.AtomList({
notList: notList,
ids: ids
});
return ret;
}
module.exports = {
parseCTabV2000: parseCTabV2000,
parseRg2000: parseRg2000,
parseRxn2000: parseRxn2000
};

View File

@ -0,0 +1,482 @@
/****************************************************************************
* Copyright 2017 EPAM Systems
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
***************************************************************************/
var Vec2 = require('../../util/vec2');
var element = require('./../element');
var Struct = require('./../struct/index');
var sGroup = require('./parseSGroup');
var utils = require('./utils');
function parseAtomLineV3000(line) { // eslint-disable-line max-statements
/* reader */
var split, subsplit, key, value, i;
split = spaceparsplit(line);
var params = {
pp: new Vec2(parseFloat(split[2]), -parseFloat(split[3]), parseFloat(split[4])),
aam: split[5].trim()
};
var label = split[1].trim();
if (label.charAt(0) == '"' && label.charAt(label.length - 1) == '"')
label = label.substr(1, label.length - 2); // strip qutation marks
if (label.charAt(label.length - 1) == ']') { // assume atom list
label = label.substr(0, label.length - 1); // remove ']'
var atomListParams = {};
atomListParams.notList = false;
if (label.substr(0, 5) == 'NOT [') {
atomListParams.notList = true;
label = label.substr(5); // remove 'NOT ['
} else if (label.charAt(0) != '[') {
throw new Error('Error: atom list expected, found \'' + label + '\'');
} else {
label = label.substr(1); // remove '['
}
atomListParams.ids = labelsListToIds(label.split(','));
params['atomList'] = new Struct.AtomList(atomListParams);
params['label'] = 'L#';
} else {
params['label'] = label;
}
split.splice(0, 6);
for (i = 0; i < split.length; ++i) {
subsplit = splitonce(split[i], '=');
key = subsplit[0];
value = subsplit[1];
if (key in utils.fmtInfo.v30atomPropMap) {
var ival = utils.parseDecimalInt(value);
if (key == 'VAL') {
if (ival == 0)
continue; // eslint-disable-line no-continue
if (ival == -1)
ival = 0;
}
params[utils.fmtInfo.v30atomPropMap[key]] = ival;
} else if (key == 'RGROUPS') {
value = value.trim().substr(1, value.length - 2);
var rgrsplit = value.split(' ').slice(1);
params.rglabel = 0;
for (var j = 0; j < rgrsplit.length; ++j)
params.rglabel |= 1 << (rgrsplit[j] - 1);
} else if (key == 'ATTCHPT') {
params.attpnt = value.trim() - 0;
}
}
return new Struct.Atom(params);
}
function parseBondLineV3000(line) {
/* reader */
var split, subsplit, key, value, i;
split = spaceparsplit(line);
var params = {
begin: utils.parseDecimalInt(split[2]) - 1,
end: utils.parseDecimalInt(split[3]) - 1,
type: utils.fmtInfo.bondTypeMap[utils.parseDecimalInt(split[1])]
};
split.splice(0, 4);
for (i = 0; i < split.length; ++i) {
subsplit = splitonce(split[i], '=');
key = subsplit[0];
value = subsplit[1];
if (key == 'CFG') {
params.stereo = utils.fmtInfo.v30bondStereoMap[utils.parseDecimalInt(value)];
if (params.type == Struct.Bond.PATTERN.TYPE.DOUBLE && params.stereo == Struct.Bond.PATTERN.STEREO.EITHER)
params.stereo = Struct.Bond.PATTERN.STEREO.CIS_TRANS;
} else if (key == 'TOPO') {
params.topology = utils.fmtInfo.bondTopologyMap[utils.parseDecimalInt(value)];
} else if (key == 'RXCTR') {
params.reactingCenterStatus = utils.parseDecimalInt(value);
} else if (key == 'STBOX') {
params.stereoCare = utils.parseDecimalInt(value);
}
}
return new Struct.Bond(params);
}
function v3000parseCollection(ctab, ctabLines, shift) {
/* reader */
shift++;
while (ctabLines[shift].trim() != 'M V30 END COLLECTION')
shift++;
shift++;
return shift;
}
function v3000parseSGroup(ctab, ctabLines, sgroups, atomMap, shift) { // eslint-disable-line max-params, max-statements
/* reader */
var line = '';
shift++;
while (shift < ctabLines.length) {
line = stripV30(ctabLines[shift++]).trim();
if (line.trim() == 'END SGROUP')
return shift;
while (line.charAt(line.length - 1) == '-')
line = (line.substr(0, line.length - 1) + stripV30(ctabLines[shift++])).trim();
var split = splitSGroupDef(line);
var type = split[1];
var sg = new Struct.SGroup(type);
sg.number = split[0] - 0;
sg.type = type;
sg.label = split[2] - 0;
sgroups[sg.number] = sg;
var props = {};
for (var i = 3; i < split.length; ++i) {
var subsplit = splitonce(split[i], '=');
if (subsplit.length != 2)
throw new Error('A record of form AAA=BBB or AAA=(...) expected, got \'' + split[i] + '\'');
var name = subsplit[0];
if (!(name in props))
props[name] = [];
props[name].push(subsplit[1]);
}
sg.atoms = parseBracedNumberList(props['ATOMS'][0], -1);
if (props['PATOMS'])
sg.patoms = parseBracedNumberList(props['PATOMS'][0], -1);
sg.bonds = props['BONDS'] ? parseBracedNumberList(props['BONDS'][0], -1) : [];
var brkxyzStrs = props['BRKXYZ'];
sg.brkxyz = [];
if (brkxyzStrs) {
for (var j = 0; j < brkxyzStrs.length; ++j)
sg.brkxyz.push(parseBracedNumberList(brkxyzStrs[j]));
}
if (props['MULT'])
sg.data.subscript = props['MULT'][0] - 0;
if (props['LABEL'])
sg.data.subscript = props['LABEL'][0].trim();
if (props['CONNECT'])
sg.data.connectivity = props['CONNECT'][0].toLowerCase();
if (props['FIELDDISP'])
sGroup.applyDataSGroupInfo(sg, stripQuotes(props['FIELDDISP'][0]));
if (props['FIELDDATA'])
sGroup.applyDataSGroupData(sg, props['FIELDDATA'][0], true);
if (props['FIELDNAME'])
sGroup.applyDataSGroupName(sg, props['FIELDNAME'][0]);
if (props['QUERYTYPE'])
sGroup.applyDataSGroupQuery(sg, props['QUERYTYPE'][0]);
if (props['QUERYOP'])
sGroup.applyDataSGroupQueryOp(sg, props['QUERYOP'][0]);
sGroup.loadSGroup(ctab, sg, atomMap);
}
throw new Error('S-group declaration incomplete.');
}
function parseCTabV3000(ctabLines, norgroups) { // eslint-disable-line max-statements
/* reader */
var ctab = new Struct();
var shift = 0;
if (ctabLines[shift++].trim() != 'M V30 BEGIN CTAB')
throw Error('CTAB V3000 invalid');
if (ctabLines[shift].slice(0, 13) != 'M V30 COUNTS')
throw Error('CTAB V3000 invalid');
var vals = ctabLines[shift].slice(14).split(' ');
ctab.isChiral = (utils.parseDecimalInt(vals[4]) == 1);
shift++;
if (ctabLines[shift].trim() == 'M V30 BEGIN ATOM') {
shift++;
var line;
while (shift < ctabLines.length) {
line = stripV30(ctabLines[shift++]).trim();
if (line == 'END ATOM')
break;
while (line.charAt(line.length - 1) == '-')
line = (line.substring(0, line.length - 1) + stripV30(ctabLines[shift++])).trim();
ctab.atoms.add(parseAtomLineV3000(line));
}
if (ctabLines[shift].trim() == 'M V30 BEGIN BOND') {
shift++;
while (shift < ctabLines.length) {
line = stripV30(ctabLines[shift++]).trim();
if (line == 'END BOND')
break;
while (line.charAt(line.length - 1) == '-')
line = (line.substring(0, line.length - 1) + stripV30(ctabLines[shift++])).trim();
ctab.bonds.add(parseBondLineV3000(line));
}
}
// TODO: let sections follow in arbitrary order
var sgroups = {};
var atomMap = {};
while (ctabLines[shift].trim() != 'M V30 END CTAB') {
if (ctabLines[shift].trim() == 'M V30 BEGIN COLLECTION')
// TODO: read collection information
shift = v3000parseCollection(ctab, ctabLines, shift);
else if (ctabLines[shift].trim() == 'M V30 BEGIN SGROUP')
shift = v3000parseSGroup(ctab, ctabLines, sgroups, atomMap, shift);
else
throw Error('CTAB V3000 invalid');
}
}
if (ctabLines[shift++].trim() != 'M V30 END CTAB')
throw Error('CTAB V3000 invalid');
if (!norgroups)
readRGroups3000(ctab, ctabLines.slice(shift));
return ctab;
}
function readRGroups3000(ctab, /* string */ ctabLines) /* Struct */ { // eslint-disable-line max-statements
/* reader */
var rfrags = {};
var rLogic = {};
var shift = 0;
while (shift < ctabLines.length && ctabLines[shift].search('M V30 BEGIN RGROUP') == 0) {
var id = ctabLines[shift++].split(' ').pop();
rfrags[id] = [];
rLogic[id] = {};
while (true) { // eslint-disable-line no-constant-condition
var line = ctabLines[shift].trim();
if (line.search('M V30 RLOGIC') == 0) {
line = line.slice(13);
var rlsplit = line.trim().split(/\s+/g);
var iii = utils.parseDecimalInt(rlsplit[0]);
var hhh = utils.parseDecimalInt(rlsplit[1]);
var ooo = rlsplit.slice(2).join(' ');
var logic = {};
if (iii > 0)
logic.ifthen = iii;
logic.resth = hhh == 1;
logic.range = ooo;
rLogic[id] = logic;
shift++;
continue; // eslint-disable-line no-continue
}
if (line != 'M V30 BEGIN CTAB')
throw Error('CTAB V3000 invalid');
for (var i = 0; i < ctabLines.length; ++i) {
if (ctabLines[shift + i].trim() == 'M V30 END CTAB')
break;
}
var lines = ctabLines.slice(shift, shift + i + 1);
var rfrag = parseCTabV3000(lines, true);
rfrags[id].push(rfrag);
shift = shift + i + 1;
if (ctabLines[shift].trim() == 'M V30 END RGROUP') {
shift++;
break;
}
}
}
for (var rgid in rfrags) {
for (var j = 0; j < rfrags[rgid].length; ++j) {
var rg = rfrags[rgid][j];
rg.rgroups.set(rgid, new Struct.RGroup(rLogic[rgid]));
var frid = rg.frags.add({});
rg.rgroups.get(rgid).frags.add(frid);
rg.atoms.each(function (aid, atom) {
atom.fragment = frid;
});
rg.mergeInto(ctab);
}
}
}
function parseRxn3000(/* string[] */ ctabLines) /* Struct */ { // eslint-disable-line max-statements
/* reader */
ctabLines = ctabLines.slice(4);
var countsSplit = ctabLines[0].split(/\s+/g).slice(3);
var nReactants = countsSplit[0] - 0,
nProducts = countsSplit[1] - 0,
nAgents = countsSplit.length > 2 ? countsSplit[2] - 0 : 0;
function findCtabEnd(i) {
for (var j = i; j < ctabLines.length; ++j) {
if (ctabLines[j].trim() == 'M V30 END CTAB')
return j;
}
console.error('CTab format invalid');
}
function findRGroupEnd(i) {
for (var j = i; j < ctabLines.length; ++j) {
if (ctabLines[j].trim() == 'M V30 END RGROUP')
return j;
}
console.error('CTab format invalid');
}
var molLinesReactants = [];
var molLinesProducts = [];
var current = null;
var rGroups = [];
for (var i = 0; i < ctabLines.length; ++i) {
var line = ctabLines[i].trim();
var j;
if (line.startsWith('M V30 COUNTS')) {
// do nothing
} else if (line == 'M END') {
break; // stop reading
} else if (line == 'M V30 BEGIN PRODUCT') {
console.assert(current == null, 'CTab format invalid');
current = molLinesProducts;
} else if (line == 'M V30 END PRODUCT') {
console.assert(current === molLinesProducts, 'CTab format invalid');
current = null;
} else if (line == 'M V30 BEGIN REACTANT') {
console.assert(current == null, 'CTab format invalid');
current = molLinesReactants;
} else if (line == 'M V30 END REACTANT') {
console.assert(current === molLinesReactants, 'CTab format invalid');
current = null;
} else if (line.startsWith('M V30 BEGIN RGROUP')) {
console.assert(current == null, 'CTab format invalid');
j = findRGroupEnd(i);
rGroups.push(ctabLines.slice(i, j + 1));
i = j;
} else if (line == 'M V30 BEGIN CTAB') {
j = findCtabEnd(i);
current.push(ctabLines.slice(i, j + 1));
i = j;
} else {
throw new Error('line unrecognized: ' + line);
}
}
var mols = [];
var molLines = molLinesReactants.concat(molLinesProducts);
for (j = 0; j < molLines.length; ++j) {
var mol = parseCTabV3000(molLines[j], countsSplit);
mols.push(mol);
}
var ctab = utils.rxnMerge(mols, nReactants, nProducts, nAgents);
readRGroups3000(ctab, function (array) {
var res = [];
for (var k = 0; k < array.length; ++k)
res = res.concat(array[k]);
return res;
}(rGroups));
return ctab;
}
// split a line by spaces outside parentheses
function spaceparsplit(line) { // eslint-disable-line max-statements
/* reader */
var split = [];
var pc = 0;
var c;
var i;
var i0 = -1;
var quoted = false;
for (i = 0; i < line.length; ++i) {
c = line[i];
if (c == '(')
pc++;
else if (c == ')')
pc--;
if (c == '"')
quoted = !quoted;
if (!quoted && line[i] == ' ' && pc == 0) {
if (i > i0 + 1)
split.push(line.slice(i0 + 1, i));
i0 = i;
}
}
if (i > i0 + 1)
split.push(line.slice(i0 + 1, i));
return split;
}
// utils
function stripQuotes(str) {
if (str[0] === '"' && str[str.length - 1] === '"')
return str.substr(1, str.length - 2);
return str;
}
function splitonce(line, delim) {
/* reader */
var p = line.indexOf(delim);
return [line.slice(0, p), line.slice(p + 1)];
}
function splitSGroupDef(line) { // eslint-disable-line max-statements
/* reader */
var split = [];
var braceBalance = 0;
var quoted = false;
for (var i = 0; i < line.length; ++i) {
var c = line.charAt(i);
if (c == '"') {
quoted = !quoted;
} else if (!quoted) {
if (c == '(') {
braceBalance++;
} else if (c == ')') {
braceBalance--;
} else if (c == ' ' && braceBalance == 0) {
split.push(line.slice(0, i));
line = line.slice(i + 1).trim();
i = 0;
}
}
}
if (braceBalance != 0)
throw new Error('Brace balance broken. S-group properies invalid!');
if (line.length > 0)
split.push(line.trim());
return split;
}
function parseBracedNumberList(line, shift) {
/* reader */
if (!line)
return null;
var list = [];
line = line.trim();
line = line.substr(1, line.length - 2);
var split = line.split(' ');
shift = shift || 0;
for (var i = 1; i < split.length; ++i) {
var value = parseInt(split[i]);
if (!isNaN(value))
list.push(value + shift);
}
return list;
}
function stripV30(line) {
/* reader */
if (line.slice(0, 7) != 'M V30 ')
throw new Error('Prefix invalid');
return line.slice(7);
}
function labelsListToIds(labels) {
/* reader */
var ids = [];
for (var i = 0; i < labels.length; ++i)
ids.push(element.map[labels[i].trim()]);
return ids;
}
module.exports = {
parseCTabV3000: parseCTabV3000,
readRGroups3000: readRGroups3000,
parseRxn3000: parseRxn3000
};