Files
enviPy-bayer/static/js/ketcher2/script/chem/molfile/v2000.js
2025-06-23 20:13:54 +02:00

427 lines
14 KiB
JavaScript

/****************************************************************************
* Copyright 2017 EPAM Systems
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
***************************************************************************/
var Vec2 = require('../../util/vec2');
var Map = require('../../util/map');
var element = require('./../element');
var Struct = require('./../struct/index');
var sGroup = require('./parseSGroup');
var utils = require('./utils');
var loadRGroupFragments = true; // TODO: set to load the fragments
function parseAtomLine(atomLine) {
/* reader */
var atomSplit = utils.partitionLine(atomLine, utils.fmtInfo.atomLinePartition);
var params =
{
// generic
pp: new Vec2(parseFloat(atomSplit[0]), -parseFloat(atomSplit[1]), parseFloat(atomSplit[2])),
label: atomSplit[4].trim(),
explicitValence: utils.fmtInfo.valenceMap[utils.parseDecimalInt(atomSplit[10])],
// obsolete
massDifference: utils.parseDecimalInt(atomSplit[5]),
charge: utils.fmtInfo.chargeMap[utils.parseDecimalInt(atomSplit[6])],
// query
hCount: utils.parseDecimalInt(utils.parseDecimalInt(atomSplit[8])),
stereoCare: utils.parseDecimalInt(atomSplit[9]) != 0,
// reaction
aam: utils.parseDecimalInt(atomSplit[14]),
invRet: utils.parseDecimalInt(atomSplit[15]),
// reaction query
exactChangeFlag: utils.parseDecimalInt(atomSplit[16]) != 0
};
return new Struct.Atom(params);
}
function parseBondLine(bondLine) {
/* reader */
var bondSplit = utils.partitionLine(bondLine, utils.fmtInfo.bondLinePartition);
var params =
{
begin: utils.parseDecimalInt(bondSplit[0]) - 1,
end: utils.parseDecimalInt(bondSplit[1]) - 1,
type: utils.fmtInfo.bondTypeMap[utils.parseDecimalInt(bondSplit[2])],
stereo: utils.fmtInfo.bondStereoMap[utils.parseDecimalInt(bondSplit[3])],
xxx: bondSplit[4],
topology: utils.fmtInfo.bondTopologyMap[utils.parseDecimalInt(bondSplit[5])],
reactingCenterStatus: utils.parseDecimalInt(bondSplit[6])
};
return new Struct.Bond(params);
}
function parseAtomListLine(/* string */atomListLine) {
/* reader */
var split = utils.partitionLine(atomListLine, utils.fmtInfo.atomListHeaderPartition);
var number = utils.parseDecimalInt(split[0]) - 1;
var notList = (split[2].trim() == 'T');
var count = utils.parseDecimalInt(split[4].trim());
var ids = atomListLine.slice(utils.fmtInfo.atomListHeaderLength);
var list = [];
var itemLength = utils.fmtInfo.atomListHeaderItemLength;
for (var i = 0; i < count; ++i)
list[i] = utils.parseDecimalInt(ids.slice(i * itemLength, ((i + 1) * itemLength) - 1));
return {
aid: number,
atomList: new Struct.AtomList({
notList: notList,
ids: list
})
};
}
function parsePropertyLines(ctab, ctabLines, shift, end, sGroups, rLogic) { // eslint-disable-line max-statements, max-params
/* reader */
var props = new Map();
while (shift < end) {
var line = ctabLines[shift];
if (line.charAt(0) == 'A') {
var propValue = ctabLines[++shift];
var isPseudo = /'.+'/.test(propValue);
if (isPseudo && !props.get('pseudo'))
props.set('pseudo', new Map());
if (!isPseudo && !props.get('alias'))
props.set('alias', new Map());
if (isPseudo) propValue = propValue.replace(/'/g, '');
props.get(isPseudo ? 'pseudo' : 'alias').set(utils.parseDecimalInt(line.slice(3, 6)) - 1, propValue);
} else if (line.charAt(0) == 'M') {
var type = line.slice(3, 6);
var propertyData = line.slice(6);
if (type == 'END') {
break;
} else if (type == 'CHG') {
if (!props.get('charge'))
props.set('charge', new Map());
props.get('charge').update(sGroup.readKeyValuePairs(propertyData));
} else if (type == 'RAD') {
if (!props.get('radical'))
props.set('radical', new Map());
props.get('radical').update(sGroup.readKeyValuePairs(propertyData));
} else if (type == 'ISO') {
if (!props.get('isotope'))
props.set('isotope', new Map());
props.get('isotope').update(sGroup.readKeyValuePairs(propertyData));
} else if (type == 'RBC') {
if (!props.get('ringBondCount'))
props.set('ringBondCount', new Map());
props.get('ringBondCount').update(sGroup.readKeyValuePairs(propertyData));
} else if (type == 'SUB') {
if (!props.get('substitutionCount'))
props.set('substitutionCount', new Map());
props.get('substitutionCount').update(sGroup.readKeyValuePairs(propertyData));
} else if (type == 'UNS') {
if (!props.get('unsaturatedAtom'))
props.set('unsaturatedAtom', new Map());
props.get('unsaturatedAtom').update(sGroup.readKeyValuePairs(propertyData));
// else if (type == "LIN") // link atom
} else if (type == 'RGP') { // rgroup atom
if (!props.get('rglabel'))
props.set('rglabel', new Map());
var rglabels = props.get('rglabel');
var a2rs = sGroup.readKeyMultiValuePairs(propertyData);
for (var a2ri = 0; a2ri < a2rs.length; a2ri++) {
var a2r = a2rs[a2ri];
rglabels.set(a2r[0], (rglabels.get(a2r[0]) || 0) | (1 << (a2r[1] - 1)));
}
} else if (type == 'LOG') { // rgroup atom
propertyData = propertyData.slice(4);
var rgid = utils.parseDecimalInt(propertyData.slice(0, 3).trim());
var iii = utils.parseDecimalInt(propertyData.slice(4, 7).trim());
var hhh = utils.parseDecimalInt(propertyData.slice(8, 11).trim());
var ooo = propertyData.slice(12).trim();
var logic = {};
if (iii > 0)
logic.ifthen = iii;
logic.resth = hhh == 1;
logic.range = ooo;
rLogic[rgid] = logic;
} else if (type == 'APO') {
if (!props.get('attpnt'))
props.set('attpnt', new Map());
props.get('attpnt').update(sGroup.readKeyValuePairs(propertyData));
} else if (type == 'ALS') { // atom list
if (!props.get('atomList'))
props.set('atomList', new Map());
var list = parsePropertyLineAtomList(
utils.partitionLine(propertyData, [1, 3, 3, 1, 1, 1]),
utils.partitionLineFixed(propertyData.slice(10), 4, false));
props.get('atomList').update(
list);
if (!props.get('label'))
props.set('label', new Map());
for (var aid in list) props.get('label').set(aid, 'L#');
} else if (type == 'STY') { // introduce s-group
sGroup.initSGroup(sGroups, propertyData);
} else if (type == 'SST') {
sGroup.applySGroupProp(sGroups, 'subtype', propertyData);
} else if (type == 'SLB') {
sGroup.applySGroupProp(sGroups, 'label', propertyData, true);
} else if (type == 'SPL') {
sGroup.applySGroupProp(sGroups, 'parent', propertyData, true, true);
} else if (type == 'SCN') {
sGroup.applySGroupProp(sGroups, 'connectivity', propertyData);
} else if (type == 'SAL') {
sGroup.applySGroupArrayProp(sGroups, 'atoms', propertyData, -1);
} else if (type == 'SBL') {
sGroup.applySGroupArrayProp(sGroups, 'bonds', propertyData, -1);
} else if (type == 'SPA') {
sGroup.applySGroupArrayProp(sGroups, 'patoms', propertyData, -1);
} else if (type == 'SMT') {
var sid = utils.parseDecimalInt(propertyData.slice(0, 4)) - 1;
sGroups[sid].data.subscript = propertyData.slice(4).trim();
} else if (type == 'SDT') {
sGroup.applyDataSGroupDesc(sGroups, propertyData);
} else if (type == 'SDD') {
sGroup.applyDataSGroupInfoLine(sGroups, propertyData);
} else if (type == 'SCD') {
sGroup.applyDataSGroupDataLine(sGroups, propertyData, false);
} else if (type == 'SED') {
sGroup.applyDataSGroupDataLine(sGroups, propertyData, true);
}
}
++shift;
}
return props;
}
function applyAtomProp(atoms /* Pool */, values /* Map */, propId /* string */) {
/* reader */
values.each(function (aid, propVal) {
atoms.get(aid)[propId] = propVal;
});
}
function parseCTabV2000(ctabLines, countsSplit) { // eslint-disable-line max-statements
/* reader */
var ctab = new Struct();
var i;
var atomCount = utils.parseDecimalInt(countsSplit[0]);
var bondCount = utils.parseDecimalInt(countsSplit[1]);
var atomListCount = utils.parseDecimalInt(countsSplit[2]);
ctab.isChiral = utils.parseDecimalInt(countsSplit[4]) != 0;
var stextLinesCount = utils.parseDecimalInt(countsSplit[5]);
var propertyLinesCount = utils.parseDecimalInt(countsSplit[10]);
var shift = 0;
var atomLines = ctabLines.slice(shift, shift + atomCount);
shift += atomCount;
var bondLines = ctabLines.slice(shift, shift + bondCount);
shift += bondCount;
var atomListLines = ctabLines.slice(shift, shift + atomListCount);
shift += atomListCount + stextLinesCount;
var atoms = atomLines.map(parseAtomLine);
for (i = 0; i < atoms.length; ++i)
ctab.atoms.add(atoms[i]);
var bonds = bondLines.map(parseBondLine);
for (i = 0; i < bonds.length; ++i)
ctab.bonds.add(bonds[i]);
var atomLists = atomListLines.map(parseAtomListLine);
atomLists.forEach(function (pair) {
ctab.atoms.get(pair.aid).atomList = pair.atomList;
ctab.atoms.get(pair.aid).label = 'L#';
});
var sGroups = {};
var rLogic = {};
var props = parsePropertyLines(ctab, ctabLines, shift,
Math.min(ctabLines.length, shift + propertyLinesCount), sGroups, rLogic);
props.each(function (propId, values) {
applyAtomProp(ctab.atoms, values, propId);
});
var atomMap = {};
var sid;
for (sid in sGroups) {
var sg = sGroups[sid];
if (sg.type === 'DAT' && sg.atoms.length === 0) {
var parent = sGroups[sid].parent;
if (parent >= 0) {
var psg = sGroups[parent - 1];
if (psg.type === 'GEN')
sg.atoms = [].slice.call(psg.atoms);
}
}
}
for (sid in sGroups)
sGroup.loadSGroup(ctab, sGroups[sid], atomMap);
var emptyGroups = [];
for (sid in sGroups) { // TODO: why do we need that?
Struct.SGroup.filter(ctab, sGroups[sid], atomMap);
if (sGroups[sid].atoms.length == 0 && !sGroups[sid].allAtoms)
emptyGroups.push(sid);
}
for (i = 0; i < emptyGroups.length; ++i) {
ctab.sGroupForest.remove(emptyGroups[i]);
ctab.sgroups.remove(emptyGroups[i]);
}
for (var rgid in rLogic)
ctab.rgroups.set(rgid, new Struct.RGroup(rLogic[rgid]));
return ctab;
}
function parseRg2000(/* string[] */ ctabLines) /* Struct */ { // eslint-disable-line max-statements
ctabLines = ctabLines.slice(7);
if (ctabLines[0].trim() != '$CTAB')
throw new Error('RGFile format invalid');
var i = 1;
while (ctabLines[i].charAt(0) != '$') i++;
if (ctabLines[i].trim() != '$END CTAB')
throw new Error('RGFile format invalid');
var coreLines = ctabLines.slice(1, i);
ctabLines = ctabLines.slice(i + 1);
var fragmentLines = {};
while (true) { // eslint-disable-line no-constant-condition
if (ctabLines.length == 0)
throw new Error('Unexpected end of file');
var line = ctabLines[0].trim();
if (line == '$END MOL') {
ctabLines = ctabLines.slice(1);
break;
}
if (line != '$RGP')
throw new Error('RGFile format invalid');
var rgid = ctabLines[1].trim() - 0;
fragmentLines[rgid] = [];
ctabLines = ctabLines.slice(2);
while (true) { // eslint-disable-line no-constant-condition
if (ctabLines.length == 0)
throw new Error('Unexpected end of file');
line = ctabLines[0].trim();
if (line == '$END RGP') {
ctabLines = ctabLines.slice(1);
break;
}
if (line != '$CTAB')
throw new Error('RGFile format invalid');
i = 1;
while (ctabLines[i].charAt(0) != '$') i++;
if (ctabLines[i].trim() != '$END CTAB')
throw new Error('RGFile format invalid');
fragmentLines[rgid].push(ctabLines.slice(1, i));
ctabLines = ctabLines.slice(i + 1);
}
}
var core = parseCTab(coreLines);
var frag = {};
if (loadRGroupFragments) {
for (var id in fragmentLines) {
frag[id] = [];
for (var j = 0; j < fragmentLines[id].length; ++j)
frag[id].push(parseCTab(fragmentLines[id][j]));
}
}
return rgMerge(core, frag);
}
function parseRxn2000(/* string[] */ ctabLines) /* Struct */ { // eslint-disable-line max-statements
/* reader */
ctabLines = ctabLines.slice(4);
var countsSplit = utils.partitionLine(ctabLines[0], utils.fmtInfo.rxnItemsPartition);
var nReactants = countsSplit[0] - 0,
nProducts = countsSplit[1] - 0,
nAgents = countsSplit[2] - 0;
ctabLines = ctabLines.slice(1); // consume counts line
var mols = [];
while (ctabLines.length > 0 && ctabLines[0].substr(0, 4) == '$MOL') {
ctabLines = ctabLines.slice(1);
var n = 0;
while (n < ctabLines.length && ctabLines[n].substr(0, 4) != '$MOL') n++;
var lines = ctabLines.slice(0, n);
var struct;
if (lines[0].search('\\$MDL') == 0) {
struct = parseRg2000(lines);
} else {
struct = parseCTab(lines.slice(3));
struct.name = lines[0].trim();
}
mols.push(struct);
ctabLines = ctabLines.slice(n);
}
return utils.rxnMerge(mols, nReactants, nProducts, nAgents);
}
function parseCTab(/* string */ ctabLines) /* Struct */ {
/* reader */
var countsSplit = utils.partitionLine(ctabLines[0], utils.fmtInfo.countsLinePartition);
ctabLines = ctabLines.slice(1);
return parseCTabV2000(ctabLines, countsSplit);
}
function rgMerge(scaffold, rgroups) /* Struct */ {
/* reader */
var ret = new Struct();
scaffold.mergeInto(ret, null, null, false, true);
for (var rgid in rgroups) {
for (var j = 0; j < rgroups[rgid].length; ++j) {
var ctab = rgroups[rgid][j];
ctab.rgroups.set(rgid, new Struct.RGroup());
var frag = {};
var frid = ctab.frags.add(frag);
ctab.rgroups.get(rgid).frags.add(frid);
ctab.atoms.each(function (aid, atom) {
atom.fragment = frid;
});
ctab.mergeInto(ret);
}
}
return ret;
}
function labelsListToIds(labels) {
/* reader */
var ids = [];
for (var i = 0; i < labels.length; ++i)
ids.push(element.map[labels[i].trim()]);
return ids;
}
function parsePropertyLineAtomList(hdr, lst) {
/* reader */
var aid = utils.parseDecimalInt(hdr[1]) - 1;
var count = utils.parseDecimalInt(hdr[2]);
var notList = hdr[4].trim() == 'T';
var ids = labelsListToIds(lst.slice(0, count));
var ret = {};
ret[aid] = new Struct.AtomList({
notList: notList,
ids: ids
});
return ret;
}
module.exports = {
parseCTabV2000: parseCTabV2000,
parseRg2000: parseRg2000,
parseRxn2000: parseRxn2000
};