View Javadoc
1   // ******************************************************************************
2   //
3   // Title:       Force Field X.
4   // Description: Force Field X - Software for Molecular Biophysics.
5   // Copyright:   Copyright (c) Michael J. Schnieders 2001-2024.
6   //
7   // This file is part of Force Field X.
8   //
9   // Force Field X is free software; you can redistribute it and/or modify it
10  // under the terms of the GNU General Public License version 3 as published by
11  // the Free Software Foundation.
12  //
13  // Force Field X is distributed in the hope that it will be useful, but WITHOUT
14  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16  // details.
17  //
18  // You should have received a copy of the GNU General Public License along with
19  // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple
20  // Place, Suite 330, Boston, MA 02111-1307 USA
21  //
22  // Linking this library statically or dynamically with other modules is making a
23  // combined work based on this library. Thus, the terms and conditions of the
24  // GNU General Public License cover the whole combination.
25  //
26  // As a special exception, the copyright holders of this library give you
27  // permission to link this library with independent modules to produce an
28  // executable, regardless of the license terms of these independent modules, and
29  // to copy and distribute the resulting executable under terms of your choice,
30  // provided that you also meet, for each linked independent module, the terms
31  // and conditions of the license of that module. An independent module is a
32  // module which is not derived from or based on this library. If you modify this
33  // library, you may extend this exception to your version of the library, but
34  // you are not obligated to do so. If you do not wish to do so, delete this
35  // exception statement from your version.
36  //
37  // ******************************************************************************
38  package ffx.potential.bonded;
39  
40  import static ffx.numerics.math.DoubleMath.add;
41  import static ffx.numerics.math.DoubleMath.dist;
42  import static ffx.numerics.math.DoubleMath.length;
43  import static ffx.numerics.math.DoubleMath.scale;
44  import static ffx.numerics.math.DoubleMath.sub;
45  import static ffx.potential.bonded.AminoAcidUtils.aminoAcidList;
46  import static ffx.potential.bonded.AminoAcidUtils.assignAminoAcidAtomTypes;
47  import static ffx.potential.bonded.Bond.logNoBondType;
48  import static ffx.potential.bonded.BondedUtils.buildBond;
49  import static ffx.potential.bonded.BondedUtils.buildH;
50  import static ffx.potential.bonded.BondedUtils.buildHeavy;
51  import static ffx.potential.bonded.BondedUtils.findAtomType;
52  import static ffx.potential.bonded.BondedUtils.intxyz;
53  import static ffx.potential.bonded.NamingUtils.checkHydrogenAtomNames;
54  import static ffx.potential.bonded.NucleicAcidUtils.assignNucleicAcidAtomTypes;
55  import static ffx.potential.bonded.NucleicAcidUtils.nucleicAcidList;
56  import static java.lang.Integer.parseInt;
57  import static java.lang.String.format;
58  import static org.apache.commons.math3.util.FastMath.random;
59  
60  import ffx.potential.MolecularAssembly;
61  import ffx.potential.Utilities;
62  import ffx.potential.bonded.AminoAcidUtils.AminoAcid3;
63  import ffx.potential.bonded.NucleicAcidUtils.NucleicAcid3;
64  import ffx.potential.bonded.Residue.ResidueType;
65  import ffx.potential.parameters.AtomType;
66  import ffx.potential.parameters.BondType;
67  import ffx.potential.parameters.ForceField;
68  import ffx.potential.parsers.PDBFilter.PDBFileStandard;
69  import java.util.ArrayList;
70  import java.util.HashMap;
71  import java.util.List;
72  import java.util.Map;
73  import java.util.logging.Level;
74  import java.util.logging.Logger;
75  import org.apache.commons.configuration2.CompositeConfiguration;
76  
77  /**
78   * Utilities for creating polymers.
79   *
80   * @author Michael Schnieders
81   * @since 1.0
82   */
83  public class PolymerUtils {
84  
85    private static final Logger logger = Logger.getLogger(PolymerUtils.class.getName());
86  
87    private static final double DEFAULT_AA_CHAINBREAK = 3.0;
88    private static final double DEFAULT_NA_CHAINBREAK_MULT = 2.0;
89  
90    /**
91     * Assign force field atoms types to common chemistries using "biotype" records.
92     *
93     * @param molecularAssembly MolecularAssembly to operate on.
94     * @param fileStandard PDB file standard to follow.
95     * @return A List of created bonds.
96     */
97    public static List<Bond> assignAtomTypes(MolecularAssembly molecularAssembly,
98        PDBFileStandard fileStandard) {
99      // Create a list to store bonds defined by PDB atom names.
100     List<Bond> bondList = new ArrayList<>();
101 
102     ForceField forceField = molecularAssembly.getForceField();
103     CompositeConfiguration properties = molecularAssembly.getProperties();
104 
105     // Only rename atoms if requested.
106     boolean standardizeAtomNames = properties.getBoolean("standardizeAtomNames", true);
107 
108     // To Do: Look for cyclic peptides and disulfides.
109     Polymer[] polymers = molecularAssembly.getChains();
110 
111     // Loop over chains.
112     if (polymers != null) {
113       logger.info(format("\n Assigning atom types for %d chains.", polymers.length));
114       for (Polymer polymer : polymers) {
115         List<Residue> residues = polymer.getResidues();
116         boolean isProtein = true;
117         // Check if all residues are known amino acids.
118         for (Residue residue : residues) {
119           String name = residue.getName().toUpperCase();
120           boolean aa = false;
121           for (AminoAcid3 amino : aminoAcidList) {
122             if (amino.toString().equalsIgnoreCase(name)) {
123               aa = true;
124               if (standardizeAtomNames) {
125                 checkHydrogenAtomNames(residue, fileStandard);
126               }
127               break;
128             }
129           }
130           // Check for a patch.
131           if (!aa) {
132             if (logger.isLoggable(Level.FINE)) {
133               logger.fine(" Checking for non-standard amino acid patch " + name);
134             }
135             HashMap<String, AtomType> types = forceField.getAtomTypes(name);
136             if (types.isEmpty()) {
137               isProtein = false;
138               break;
139             } else {
140               if (logger.isLoggable(Level.FINE)) {
141                 logger.fine(" Patch found for non-standard amino acid " + name);
142               }
143             }
144           }
145         }
146 
147         // If all the residues in this chain have known amino acids names, then attempt to assign
148         // atom types.
149         if (isProtein) {
150           try {
151             logger.info(format(" Amino acid chain %s", polymer.getName()));
152             double dist = properties.getDouble("chainbreak", DEFAULT_AA_CHAINBREAK);
153             // Detect main chain breaks!
154             List<List<Residue>> subChains = findChainBreaks(residues, dist);
155             for (List<Residue> subChain : subChains) {
156               assignAminoAcidAtomTypes(subChain, forceField, bondList);
157             }
158           } catch (BondedUtils.MissingHeavyAtomException missingHeavyAtomException) {
159             logger.log(Level.INFO, Utilities.stackTraceToString(missingHeavyAtomException));
160             logger.severe(missingHeavyAtomException.toString());
161           } catch (BondedUtils.MissingAtomTypeException missingAtomTypeException) {
162             logger.log(Level.INFO, Utilities.stackTraceToString(missingAtomTypeException));
163             logger.severe(missingAtomTypeException.toString());
164           }
165           continue;
166         }
167 
168         // Check if all residues have known nucleic acids names.
169         boolean isNucleicAcid = true;
170         for (Residue residue : residues) {
171           String currentName = residue.getName().toUpperCase();
172           // Convert 1 and 2-character nucleic acid names to 3-character names.
173           String name = switch (currentName) {
174             case "A" -> NucleicAcid3.ADE.toString();
175             case "C" -> NucleicAcid3.CYT.toString();
176             case "G" -> NucleicAcid3.GUA.toString();
177             case "T" -> NucleicAcid3.THY.toString();
178             case "U" -> NucleicAcid3.URI.toString();
179             case "YG" -> NucleicAcid3.YYG.toString();
180             case "DA" -> NucleicAcid3.DAD.toString();
181             case "DC" -> NucleicAcid3.DCY.toString();
182             case "DG" -> NucleicAcid3.DGU.toString();
183             case "DT" -> NucleicAcid3.DTY.toString();
184             default -> currentName;
185           };
186           residue.setName(name);
187           NucleicAcid3 nucleicAcid = null;
188           for (NucleicAcid3 nucleic : nucleicAcidList) {
189             String nuc3 = nucleic.toString();
190             nuc3 = nuc3.substring(nuc3.length() - 3);
191             if (nuc3.equalsIgnoreCase(name)) {
192               nucleicAcid = nucleic;
193               break;
194             }
195           }
196           if (nucleicAcid == null) {
197             logger.info(format("Nucleic acid was not recognized %s.", name));
198             isNucleicAcid = false;
199             break;
200           }
201         }
202 
203         /*
204          If all the residues in this chain have known nucleic acids
205          names, then attempt to assign atom types.
206         */
207         if (isNucleicAcid) {
208           try {
209             logger.info(format(" Nucleic acid chain %s", polymer.getName()));
210 
211             if (logger.isLoggable(Level.FINE)) {
212               logger.fine(
213                   format(" EXPERIMENTAL: Finding chain breaks for possible nucleic acid chain %s",
214                       polymer.getName()));
215             }
216             double dist = properties.getDouble("chainbreak", DEFAULT_AA_CHAINBREAK);
217             dist *= DEFAULT_NA_CHAINBREAK_MULT;
218             // Detect main chain breaks!
219             List<List<Residue>> subChains = findChainBreaks(residues, dist);
220 
221             for (List<Residue> subChain : subChains) {
222               assignNucleicAcidAtomTypes(subChain, forceField, bondList);
223             }
224           } catch (BondedUtils.MissingHeavyAtomException | BondedUtils.MissingAtomTypeException e) {
225             logger.log(Level.INFO, Utilities.stackTraceToString(e));
226             logger.severe(e.toString());
227           }
228         }
229       }
230     }
231 
232     // Assign ion atom types.
233     List<MSNode> ions = molecularAssembly.getIons();
234     if (ions != null && !ions.isEmpty()) {
235       logger.info(format(" Assigning atom types for %d ions.", ions.size()));
236       for (MSNode m : ions) {
237         Molecule ion = (Molecule) m;
238         String name = ion.getResidueName().toUpperCase();
239         NamingUtils.HetAtoms hetatm = NamingUtils.HetAtoms.parse(name);
240         Atom atom = ion.getAtomList().get(0);
241         if (ion.getAtomList().size() != 1) {
242           logger.severe(format(" Check residue %s of chain %s.", ion, ion.getChainID()));
243         }
244         try {
245           switch (hetatm) {
246             case NA -> atom.setAtomType(findAtomType(2004, forceField));
247             case K -> atom.setAtomType(findAtomType(2005, forceField));
248             case MG, MG2 -> atom.setAtomType(findAtomType(2008, forceField));
249             case CA, CA2 -> atom.setAtomType(findAtomType(2009, forceField));
250             case ZN, ZN2 -> atom.setAtomType(findAtomType(2016, forceField));
251             case CL -> atom.setAtomType(findAtomType(2013, forceField));
252             case BR -> atom.setAtomType(findAtomType(2012, forceField));
253             default ->
254                 logger.severe(format(" Check residue %s of chain %s.", ion, ion.getChainID()));
255           }
256         } catch (Exception e) {
257           logger.log(Level.INFO, Utilities.stackTraceToString(e));
258           String message = "Error assigning atom types.";
259           logger.log(Level.SEVERE, message, e);
260         }
261       }
262     }
263     // Assign water atom types.
264     List<MSNode> water = molecularAssembly.getWater();
265     if (water != null && !water.isEmpty()) {
266       logger.info(format(" Assigning atom types for %d water.", water.size()));
267       for (MSNode m : water) {
268         Molecule wat = (Molecule) m;
269         try {
270           Atom O = buildHeavy(wat, "O", null, 2001, forceField, bondList);
271           Atom H1 = buildH(wat, "H1", O, 0.96e0, null, 109.5e0, null, 120.0e0, 0, 2002, forceField,
272               bondList);
273           Atom H2 = buildH(wat, "H2", O, 0.96e0, H1, 109.5e0, null, 120.0e0, 0, 2002, forceField,
274               bondList);
275           O.setHetero(true);
276           H1.setHetero(true);
277           H2.setHetero(true);
278         } catch (Exception e) {
279           logger.log(Level.INFO, Utilities.stackTraceToString(e));
280           String message = " Error assigning atom types to a water.";
281           logger.log(Level.SEVERE, message, e);
282         }
283       }
284     }
285 
286     // Assign small molecule atom types.
287     List<MSNode> molecules = molecularAssembly.getMolecules();
288     for (MSNode m : molecules) {
289       Molecule molecule = (Molecule) m;
290       String moleculeName = molecule.getResidueName();
291       logger.info(" Attempting to patch " + moleculeName);
292       List<Atom> moleculeAtoms = molecule.getAtomList();
293       boolean patched = true;
294       HashMap<String, AtomType> types = forceField.getAtomTypes(moleculeName);
295       // Assign atom types for all known atoms.
296       for (Atom atom : moleculeAtoms) {
297         String atomName = atom.getName().toUpperCase();
298         AtomType atomType = types.get(atomName);
299         if (atomType == null) {
300           logger.info(" No atom type was found for " + atomName + " of " + moleculeName + ".");
301           patched = false;
302           break;
303         } else {
304           logger.fine(" " + atom + " -> " + atomType);
305           atom.setAtomType(atomType);
306           types.remove(atomName);
307         }
308       }
309       // Create missing hydrogen atoms. Check for missing heavy atoms.
310       if (patched && !types.isEmpty()) {
311         for (AtomType type : types.values()) {
312           if (type.atomicNumber != 1) {
313             logger.info(" Missing heavy atom " + type.name);
314             patched = false;
315             break;
316           }
317         }
318       }
319       // Create bonds between known atoms.
320       if (patched) {
321         for (Atom atom : moleculeAtoms) {
322           String atomName = atom.getName();
323           String[] bonds = forceField.getBonds(moleculeName, atomName);
324           if (bonds != null) {
325             for (String name : bonds) {
326               Atom atom2 = molecule.getAtom(name);
327               if (atom2 != null && !atom.isBonded(atom2)) {
328                 buildBond(atom, atom2, forceField, bondList);
329               }
330             }
331           }
332         }
333       }
334       // Create missing hydrogen atoms.
335       if (patched && !types.isEmpty()) {
336         // Create a map of the molecule's atoms
337         Map<String, Atom> atomMap = new HashMap<>();
338         for (Atom atom : moleculeAtoms) {
339           atomMap.put(atom.getName().toUpperCase(), atom);
340         }
341         for (String atomName : types.keySet()) {
342           AtomType type = types.get(atomName);
343           String[] bonds = forceField.getBonds(moleculeName, atomName.toUpperCase());
344           if (bonds == null || bonds.length != 1) {
345             patched = false;
346             logger.info(" Check biotype for hydrogen " + type.name + ".");
347             break;
348           }
349           // Get the heavy atom the hydrogen is bonded to.
350           Atom ia = atomMap.get(bonds[0].toUpperCase());
351           Atom hydrogen = new Atom(0, atomName, ia.getAltLoc(), new double[3], ia.getResidueName(),
352               ia.getResidueNumber(), ia.getChainID(), ia.getOccupancy(), ia.getTempFactor(),
353               ia.getSegID());
354           logger.fine(" Created hydrogen " + atomName + ".");
355           hydrogen.setAtomType(type);
356           hydrogen.setHetero(true);
357           molecule.addMSNode(hydrogen);
358           int valence = ia.getAtomType().valence;
359           List<Bond> aBonds = ia.getBonds();
360           int numBonds = aBonds.size();
361           // Try to find the following configuration: ib-ia-ic
362           Atom ib = null;
363           Atom ic = null;
364           Atom id = null;
365           if (numBonds > 0) {
366             Bond bond = aBonds.get(0);
367             ib = bond.get1_2(ia);
368           }
369           if (numBonds > 1) {
370             Bond bond = aBonds.get(1);
371             ic = bond.get1_2(ia);
372           }
373           if (numBonds > 2) {
374             Bond bond = aBonds.get(2);
375             id = bond.get1_2(ia);
376           }
377 
378           // Building the hydrogen depends on hybridization and the locations of other bonded
379           // atoms.
380           logger.fine(
381               " Bonding " + atomName + " to " + ia.getName() + " (" + numBonds + " of " + valence
382                   + ").");
383           switch (valence) {
384             case 4 -> {
385               switch (numBonds) {
386                 case 3 -> {
387                   // Find the average coordinates of atoms ib, ic and id.
388                   double[] b = ib.getXYZ(null);
389                   double[] c = ic.getXYZ(null);
390                   double[] d = id.getXYZ(null);
391                   double[] a = new double[3];
392                   a[0] = (b[0] + c[0] + d[0]) / 3.0;
393                   a[1] = (b[1] + c[1] + d[1]) / 3.0;
394                   a[2] = (b[2] + c[2] + d[2]) / 3.0;
395                   // Place the hydrogen at chiral position #1.
396                   intxyz(hydrogen, ia, 1.0, ib, 109.5, ic, 109.5, 1);
397                   double[] e1 = new double[3];
398                   hydrogen.getXYZ(e1);
399                   double[] ret = new double[3];
400                   sub(a, e1, ret);
401                   double l1 = length(ret);
402                   // Place the hydrogen at chiral position #2.
403                   intxyz(hydrogen, ia, 1.0, ib, 109.5, ic, 109.5, -1);
404                   double[] e2 = new double[3];
405                   hydrogen.getXYZ(e2);
406                   sub(a, e2, ret);
407                   double l2 = length(ret);
408                   // Revert to #1 if it is farther from the average.
409                   if (l1 > l2) {
410                     hydrogen.setXYZ(e1);
411                   }
412                 }
413                 case 2 -> intxyz(hydrogen, ia, 1.0, ib, 109.5, ic, 109.5, 0);
414                 case 1 -> intxyz(hydrogen, ia, 1.0, ib, 109.5, null, 0.0, 0);
415                 case 0 -> intxyz(hydrogen, ia, 1.0, null, 0.0, null, 0.0, 0);
416                 default -> {
417                   logger.info(" Check biotype for hydrogen " + atomName + ".");
418                   patched = false;
419                 }
420               }
421             }
422             case 3 -> {
423               switch (numBonds) {
424                 case 2 -> intxyz(hydrogen, ia, 1.0, ib, 120.0, ic, 0.0, 0);
425                 case 1 -> intxyz(hydrogen, ia, 1.0, ib, 120.0, null, 0.0, 0);
426                 case 0 -> intxyz(hydrogen, ia, 1.0, null, 0.0, null, 0.0, 0);
427                 default -> {
428                   logger.info(" Check biotype for hydrogen " + atomName + ".");
429                   patched = false;
430                 }
431               }
432             }
433             case 2 -> {
434               switch (numBonds) {
435                 case 1 -> intxyz(hydrogen, ia, 1.0, ib, 120.0, null, 0.0, 0);
436                 case 0 -> intxyz(hydrogen, ia, 1.0, null, 0.0, null, 0.0, 0);
437                 default -> {
438                   logger.info(" Check biotype for hydrogen " + atomName + ".");
439                   patched = false;
440                 }
441               }
442             }
443             case 1 -> {
444               if (numBonds == 0) {
445                 intxyz(hydrogen, ia, 1.0, null, 0.0, null, 0.0, 0);
446               } else {
447                 logger.info(" Check biotype for hydrogen " + atomName + ".");
448                 patched = false;
449               }
450             }
451             default -> {
452               logger.info(" Check biotype for hydrogen " + atomName + ".");
453               patched = false;
454             }
455           }
456           if (!patched) {
457             break;
458           } else {
459             buildBond(ia, hydrogen, forceField, bondList);
460           }
461         }
462       }
463       if (!patched) {
464         logger.log(Level.INFO, format(" Deleting unrecognized molecule %s.", m));
465         molecularAssembly.deleteMolecule((Molecule) m);
466       } else {
467         logger.info(" Patch for " + moleculeName + " succeeded.");
468       }
469     }
470     resolvePolymerLinks(molecules, molecularAssembly, bondList);
471     return bondList;
472   }
473 
474   /**
475    * Assign parameters to disulfide bonds.
476    *
477    * @param ssBondList List of SSBOND records.
478    * @param molecularAssembly MolecularAssembly to operate on.
479    * @param bondList Add new SS-Bonds to this list.
480    */
481   public static void buildDisulfideBonds(List<Bond> ssBondList, MolecularAssembly molecularAssembly,
482       List<Bond> bondList) {
483     StringBuilder sb = new StringBuilder(" Disulfide Bonds:");
484     ForceField forceField = molecularAssembly.getForceField();
485     for (Bond bond : ssBondList) {
486       Atom a1 = bond.getAtom(0);
487       Atom a2 = bond.getAtom(1);
488       BondType bondType = forceField.getBondType(a1.getAtomType(), a2.getAtomType());
489       if (bondType == null) {
490         logNoBondType(a1, a2, forceField);
491       } else {
492         bond.setBondType(bondType);
493       }
494       double d = dist(a1.getXYZ(null), a2.getXYZ(null));
495       Polymer c1 = molecularAssembly.getChain(a1.getSegID());
496       Polymer c2 = molecularAssembly.getChain(a2.getSegID());
497       Residue r1 = c1.getResidue(a1.getResidueNumber());
498       Residue r2 = c2.getResidue(a2.getResidueNumber());
499       sb.append(format("\n S-S distance of %6.2f for %s and %s.", d, r1.toString(), r2.toString()));
500       bondList.add(bond);
501     }
502     if (!ssBondList.isEmpty()) {
503       logger.info(sb.toString());
504     }
505   }
506 
507   /**
508    * Currently builds missing internal loops based on information in DBREF and SEQRES records.
509    *
510    * <p>Known limitations include: 1) No building n- and c-terminal loops. 2) No support for DBREF1
511    * or DBREF2 records. 3) Incomplete optimization scheme to position the loops.
512    *
513    * @param xyzIndex XYZ index to begin from.
514    * @param molecularAssembly MolecularAssembly to operate on.
515    * @param seqres Map of SEQRES entries.
516    * @param dbref Map of DBREF entries.
517    * @return xyzIndex updated based on built atoms.
518    */
519   public static int buildMissingResidues(int xyzIndex, MolecularAssembly molecularAssembly,
520       Map<Character, String[]> seqres, Map<Character, int[]> dbref) {
521 
522     // Only build loops if the buildLoops flag is true.
523     CompositeConfiguration properties = molecularAssembly.getProperties();
524     if (!properties.getBoolean("buildLoops", false)) {
525       return xyzIndex;
526     }
527 
528     Polymer[] polymers = molecularAssembly.getChains();
529     for (Polymer polymer : polymers) {
530       Character chainID = polymer.getChainID();
531       String[] resNames = seqres.get(chainID);
532       int[] seqRange = dbref.get(chainID);
533       if (resNames == null || seqRange == null) {
534         continue;
535       }
536       int seqBegin = seqRange[0];
537       int seqEnd = seqRange[1];
538       logger.info(
539           format("\n Checking for missing residues in chain %s between residues %d and %d.", polymer,
540               seqBegin, seqEnd));
541 
542       int firstResID = polymer.getFirstResidue().getResidueNumber();
543       for (int i = 0; i < resNames.length; i++) {
544         int currentID = seqBegin + i;
545 
546         Residue currentResidue = polymer.getResidue(currentID);
547         if (currentResidue != null) {
548           continue;
549         }
550 
551         if (currentID <= firstResID) {
552           logger.info(
553               format(" Residue %d is missing, but is at the beginning of the chain.", currentID));
554           continue;
555         }
556 
557         Residue previousResidue = polymer.getResidue(currentID - 1);
558         if (previousResidue == null) {
559           logger.info(
560               format(" Residue %d is missing, but could not be build (previous residue missing).",
561                   currentID));
562           continue;
563         }
564 
565         Residue nextResidue = null;
566         for (int j = currentID + 1; j <= seqEnd; j++) {
567           nextResidue = polymer.getResidue(j);
568           if (nextResidue != null) {
569             break;
570           }
571         }
572         // Residues at the end of the chain are not currently built.
573         if (nextResidue == null) {
574           logger.info(format(" Residue %d is missing, but is at the end of the chain.", currentID));
575           break;
576         }
577         // Find the previous carbonyl carbon and next nitrogen.
578         Atom C = (Atom) previousResidue.getAtomNode("C");
579         Atom N = (Atom) nextResidue.getAtomNode("N");
580         if (C == null || N == null) {
581           logger.info(
582               format(" Residue %d is missing, but bonding atoms are missing (C or N).", currentID));
583           continue;
584         }
585 
586         // Build the missing residue.
587         currentResidue = polymer.getResidue(resNames[i], currentID, true);
588 
589         double[] vector = new double[3];
590         int count = 3 * (nextResidue.getResidueNumber() - previousResidue.getResidueNumber());
591         sub(N.getXYZ(null), C.getXYZ(null), vector);
592         scale(vector, 1.0 / count, vector);
593 
594         double[] nXYZ = new double[3];
595         add(C.getXYZ(null), vector, nXYZ);
596         nXYZ[0] += random() - 0.5;
597         nXYZ[1] += random() - 0.5;
598         nXYZ[2] += random() - 0.5;
599         Atom newN = new Atom(xyzIndex++, "N", C.getAltLoc(), nXYZ, resNames[i], currentID, chainID,
600             1.0, C.getTempFactor(), C.getSegID(), true);
601         currentResidue.addMSNode(newN);
602 
603         double[] caXYZ = new double[3];
604         scale(vector, 2.0, vector);
605         add(C.getXYZ(null), vector, caXYZ);
606         caXYZ[0] += Math.random() - 0.5;
607         caXYZ[1] += Math.random() - 0.5;
608         caXYZ[2] += Math.random() - 0.5;
609         Atom newCA = new Atom(xyzIndex++, "CA", C.getAltLoc(), caXYZ, resNames[i], currentID,
610             chainID, 1.0, C.getTempFactor(), C.getSegID(), true);
611         currentResidue.addMSNode(newCA);
612 
613         double[] cXYZ = new double[3];
614         scale(vector, 1.5, vector);
615         add(C.getXYZ(null), vector, cXYZ);
616         cXYZ[0] += Math.random() - 0.5;
617         cXYZ[1] += Math.random() - 0.5;
618         cXYZ[2] += Math.random() - 0.5;
619         Atom newC = new Atom(xyzIndex++, "C", C.getAltLoc(), cXYZ, resNames[i], currentID, chainID,
620             1.0, C.getTempFactor(), C.getSegID(), true);
621         currentResidue.addMSNode(newC);
622 
623         double[] oXYZ = new double[3];
624         vector[0] = Math.random() - 0.5;
625         vector[1] = Math.random() - 0.5;
626         vector[2] = Math.random() - 0.5;
627         add(cXYZ, vector, oXYZ);
628         Atom newO = new Atom(xyzIndex++, "O", C.getAltLoc(), oXYZ, resNames[i], currentID, chainID,
629             1.0, C.getTempFactor(), C.getSegID(), true);
630         currentResidue.addMSNode(newO);
631         logger.info(format(" Building residue %8s.", currentResidue));
632       }
633     }
634     return xyzIndex;
635   }
636 
637   public static List<List<Residue>> findChainBreaks(List<Residue> residues, double cutoff) {
638     List<List<Residue>> subChains = new ArrayList<>();
639 
640     // Chain-start atom: N (amino)/O5* (nucleic)
641     // Chain-end atom:   C (amino)/O3* (nucleic)
642     ResidueType rType = residues.get(0).getResidueType();
643     String startAtName = null;
644     String endAtName = null;
645     switch (rType) {
646       case AA -> {
647         startAtName = "N";
648         endAtName = "C";
649       }
650       case NA -> {
651         boolean namedStar = residues.stream().flatMap((Residue r) -> r.getAtomList().stream())
652             .anyMatch((Atom a) -> a.getName().equals("O5*"));
653         if (namedStar) {
654           startAtName = "O5*";
655           endAtName = "O3*";
656         } else {
657           startAtName = "O5'";
658           endAtName = "O3'";
659         }
660       }
661       case UNK -> {
662         logger.fine(" Not attempting to find chain breaks for chain with residue " + residues.get(0)
663             .toString());
664         List<List<Residue>> retList = new ArrayList<>();
665         retList.add(residues);
666         return retList;
667       }
668     }
669 
670     List<Residue> subChain = null;
671     Residue previousResidue = null;
672     Atom priorEndAtom = null;
673     StringBuilder sb = new StringBuilder(" Chain Breaks:");
674 
675     for (Residue residue : residues) {
676       List<Atom> resAtoms = residue.getAtomList();
677       if (priorEndAtom == null) {
678         // Initialization.
679         subChain = new ArrayList<>();
680         subChain.add(residue);
681         subChains.add(subChain);
682       } else {
683         // Find the start atom of the current residue.
684         Atom startAtom = null;
685         for (Atom a : resAtoms) {
686           if (a.getName().equalsIgnoreCase(startAtName)) {
687             startAtom = a;
688             break;
689           }
690         }
691         if (startAtom == null) {
692           subChain.add(residue);
693           continue;
694         }
695         // Compute the distance between the previous carbonyl carbon and the current nitrogen.
696         double r = dist(priorEndAtom.getXYZ(null), startAtom.getXYZ(null));
697         if (r > cutoff) {
698           // Start a new chain.
699           subChain = new ArrayList<>();
700           subChain.add(residue);
701           subChains.add(subChain);
702           char ch1 = previousResidue.getChainID();
703           char ch2 = residue.getChainID();
704           sb.append(
705               format("\n C-N distance of %6.2f A for %c-%s and %c-%s.", r, ch1, previousResidue, ch2,
706                   residue));
707         } else {
708           // Continue the current chain.
709           subChain.add(residue);
710         }
711       }
712 
713       // Save the carbonyl carbon.
714       for (Atom a : resAtoms) {
715         if (a.getName().equalsIgnoreCase(endAtName)) {
716           priorEndAtom = a;
717           break;
718         }
719       }
720       previousResidue = residue;
721     }
722 
723     if (subChains.size() > 1) {
724       logger.info(sb.toString());
725     }
726 
727     return subChains;
728   }
729 
730   /**
731    * Locate disulfide bonds based on SSBOND records.
732    *
733    * @param ssbonds List of SSBOND records.
734    * @param molecularAssembly The MolecularAssembly to operate on.
735    * @param pdbToNewResMap Maps chainIDResNumInsCode to renumbered chainIDResNum. For example,
736    *     residue 52A in chain C might be renumbered to residue 53, and mapped as "C52A" to "C53".
737    * @return A List of Bond instances for SS-Bonds.
738    */
739   public static List<Bond> locateDisulfideBonds(List<String> ssbonds,
740       MolecularAssembly molecularAssembly, Map<String, String> pdbToNewResMap) {
741     List<Bond> ssBondList = new ArrayList<>();
742     for (String ssbond : ssbonds) {
743       // =============================================================================
744       // The SSBOND record identifies each disulfide bond in protein and polypeptide
745       // structures by identifying the two residues involved in the bond.
746       // The disulfide bond distance is included after the symmetry operations at
747       // the end of the SSBOND record.
748       //
749       //  8 - 10        Integer         serNum       Serial number.
750       // 12 - 14        LString(3)      "CYS"        Residue name.
751       // 16             Character       chainID1     Chain identifier.
752       // 18 - 21        Integer         seqNum1      Residue sequence number.
753       // 22             AChar           icode1       Insertion code.
754       // 26 - 28        LString(3)      "CYS"        Residue name.
755       // 30             Character       chainID2     Chain identifier.
756       // 32 - 35        Integer         seqNum2      Residue sequence number.
757       // 36             AChar           icode2       Insertion code.
758       // 60 - 65        SymOP           sym1         Symmetry oper for 1st resid
759       // 67 - 72        SymOP           sym2         Symmetry oper for 2nd resid
760       // 74 – 78        Real(5.2)      Length        Disulfide bond distance
761       //
762       // If SG of cysteine is disordered then there are possible alternate linkages.
763       // wwPDB practice is to put together all possible SSBOND records. This is
764       // problematic because the alternate location identifier is not specified in
765       // the SSBOND record.
766       // =============================================================================
767       try {
768         char c1ch = ssbond.charAt(15);
769         char c2ch = ssbond.charAt(29);
770         Polymer c1 = molecularAssembly.getChain(format("%c", c1ch));
771         Polymer c2 = molecularAssembly.getChain(format("%c", c2ch));
772 
773         Polymer[] chains = molecularAssembly.getChains();
774         if (c1 == null) {
775           c1 = chains[0];
776         }
777         if (c2 == null) {
778           c2 = chains[0];
779         }
780 
781         String origResNum1 = ssbond.substring(17, 21).trim();
782         char insChar1 = ssbond.charAt(21);
783         String origResNum2 = ssbond.substring(31, 35).trim();
784         char insChar2 = ssbond.charAt(35);
785 
786         String pdbResNum1 = format("%c%s%c", c1ch, origResNum1, insChar1);
787         String pdbResNum2 = format("%c%s%c", c2ch, origResNum2, insChar2);
788         String resnum1 = pdbToNewResMap.get(pdbResNum1);
789         String resnum2 = pdbToNewResMap.get(pdbResNum2);
790         if (resnum1 == null) {
791           logger.warning(format(" Could not find residue %s for SS-bond %s", pdbResNum1, ssbond));
792           continue;
793         }
794         if (resnum2 == null) {
795           logger.warning(format(" Could not find residue %s for SS-bond %s", pdbResNum2, ssbond));
796           continue;
797         }
798 
799         Residue r1 = c1.getResidue(parseInt(resnum1.substring(1)));
800         Residue r2 = c2.getResidue(parseInt(resnum2.substring(1)));
801         List<Atom> atoms1 = r1.getAtomList();
802         List<Atom> atoms2 = r2.getAtomList();
803         Atom SG1 = null;
804         Atom SG2 = null;
805         for (Atom atom : atoms1) {
806           if (atom.getName().equalsIgnoreCase("SG")) {
807             SG1 = atom;
808             break;
809           }
810         }
811         for (Atom atom : atoms2) {
812           if (atom.getName().equalsIgnoreCase("SG")) {
813             SG2 = atom;
814             break;
815           }
816         }
817         if (SG1 == null) {
818           logger.warning(format(" SG atom 1 of SS-bond %s is null", ssbond));
819         }
820         if (SG2 == null) {
821           logger.warning(format(" SG atom 2 of SS-bond %s is null", ssbond));
822         }
823         if (SG1 == null || SG2 == null) {
824           continue;
825         }
826         double d = dist(SG1.getXYZ(null), SG2.getXYZ(null));
827         if (d < 5.0) {
828           r1.setName("CYX");
829           r2.setName("CYX");
830           for (Atom atom : atoms1) {
831             atom.setResName("CYX");
832           }
833           for (Atom atom : atoms2) {
834             atom.setResName("CYX");
835           }
836           Bond bond = new Bond(SG1, SG2);
837           ssBondList.add(bond);
838         } else {
839           String message = format("Ignoring [%s]\n due to distance %8.3f A.", ssbond, d);
840           logger.log(Level.WARNING, message);
841         }
842       } catch (Exception e) {
843         String message = format("Ignoring [%s]", ssbond);
844         logger.log(Level.WARNING, message, e);
845       }
846     }
847     return ssBondList;
848   }
849 
850   /**
851    * Resolves links between polymeric hetero groups; presently only functional for cyclic molecules.
852    *
853    * @param molecules List of Molecules in the molecular assembly.
854    * @param molecularAssembly MolecularAssembly to operate on.
855    * @param bondList Add created bonds to this list.
856    */
857   public static void resolvePolymerLinks(List<MSNode> molecules, MolecularAssembly molecularAssembly,
858       List<Bond> bondList) {
859 
860     ForceField forceField = molecularAssembly.getForceField();
861     CompositeConfiguration properties = molecularAssembly.getProperties();
862 
863     for (String polyLink : properties.getStringArray("polymerlink")) {
864       logger.info(" Experimental: linking a cyclic hetero group: " + polyLink);
865 
866       // Format: polymerlink resname atom1 atom2 [cyclize]
867       String[] toks = polyLink.split("\\s+");
868       String resName = toks[0];
869       String name1 = toks[1];
870       String name2 = toks[2];
871       int cyclicLen = 0;
872       if (toks.length > 3) {
873         cyclicLen = parseInt(toks[3]);
874       }
875 
876       List<Molecule> matches = new ArrayList<>();
877       for (MSNode node : molecules) {
878         Molecule m = (Molecule) node;
879         if (m.getResidueName().equalsIgnoreCase(resName)) {
880           matches.add(m);
881         }
882       }
883 
884       for (int i = 0; i < matches.size(); i++) {
885         Molecule mi = matches.get(i);
886         int ii = i + 1;
887         if (cyclicLen < 1) {
888           logger.severe(" No current support for polymeric, non-cyclic hetero groups");
889           // Would probably split by chain.
890         } else {
891           Molecule next;
892           if (ii % cyclicLen == 0) {
893             next = matches.get(ii - cyclicLen);
894             logger.info(format(" Cyclizing molecule %s to %s", mi, next));
895           } else {
896             next = matches.get(ii);
897             logger.info(format(" Extending chain from %s to %s.", mi, next));
898           }
899           Atom from = mi.getAtomByName(name1, true);
900           Atom to = next.getAtomByName(name2, true);
901           buildBond(from, to, forceField, bondList);
902         }
903       }
904     }
905   }
906 }