View Javadoc
1   // ******************************************************************************
2   //
3   // Title:       Force Field X.
4   // Description: Force Field X - Software for Molecular Biophysics.
5   // Copyright:   Copyright (c) Michael J. Schnieders 2001-2025.
6   //
7   // This file is part of Force Field X.
8   //
9   // Force Field X is free software; you can redistribute it and/or modify it
10  // under the terms of the GNU General Public License version 3 as published by
11  // the Free Software Foundation.
12  //
13  // Force Field X is distributed in the hope that it will be useful, but WITHOUT
14  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16  // details.
17  //
18  // You should have received a copy of the GNU General Public License along with
19  // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple
20  // Place, Suite 330, Boston, MA 02111-1307 USA
21  //
22  // Linking this library statically or dynamically with other modules is making a
23  // combined work based on this library. Thus, the terms and conditions of the
24  // GNU General Public License cover the whole combination.
25  //
26  // As a special exception, the copyright holders of this library give you
27  // permission to link this library with independent modules to produce an
28  // executable, regardless of the license terms of these independent modules, and
29  // to copy and distribute the resulting executable under terms of your choice,
30  // provided that you also meet, for each linked independent module, the terms
31  // and conditions of the license of that module. An independent module is a
32  // module which is not derived from or based on this library. If you modify this
33  // library, you may extend this exception to your version of the library, but
34  // you are not obligated to do so. If you do not wish to do so, delete this
35  // exception statement from your version.
36  //
37  // ******************************************************************************
38  package ffx.potential.bonded;
39  
40  import ffx.potential.MolecularAssembly;
41  import ffx.potential.Utilities;
42  import ffx.potential.bonded.AminoAcidUtils.AminoAcid3;
43  import ffx.potential.bonded.NucleicAcidUtils.NucleicAcid3;
44  import ffx.potential.bonded.Residue.ResidueType;
45  import ffx.potential.parameters.AtomType;
46  import ffx.potential.parameters.BondType;
47  import ffx.potential.parameters.ForceField;
48  import ffx.potential.parsers.PDBFilter.PDBFileStandard;
49  import org.apache.commons.configuration2.CompositeConfiguration;
50  
51  import java.util.ArrayList;
52  import java.util.HashMap;
53  import java.util.List;
54  import java.util.Map;
55  import java.util.logging.Level;
56  import java.util.logging.Logger;
57  
58  import static ffx.numerics.math.DoubleMath.add;
59  import static ffx.numerics.math.DoubleMath.dist;
60  import static ffx.numerics.math.DoubleMath.length;
61  import static ffx.numerics.math.DoubleMath.scale;
62  import static ffx.numerics.math.DoubleMath.sub;
63  import static ffx.potential.bonded.AminoAcidUtils.aminoAcidList;
64  import static ffx.potential.bonded.AminoAcidUtils.assignAminoAcidAtomTypes;
65  import static ffx.potential.bonded.Bond.logNoBondType;
66  import static ffx.potential.bonded.BondedUtils.buildBond;
67  import static ffx.potential.bonded.BondedUtils.buildH;
68  import static ffx.potential.bonded.BondedUtils.buildHeavy;
69  import static ffx.potential.bonded.BondedUtils.findAtomType;
70  import static ffx.potential.bonded.BondedUtils.intxyz;
71  import static ffx.potential.bonded.NamingUtils.checkHydrogenAtomNames;
72  import static ffx.potential.bonded.NucleicAcidUtils.assignNucleicAcidAtomTypes;
73  import static ffx.potential.bonded.NucleicAcidUtils.nucleicAcidList;
74  import static java.lang.Integer.parseInt;
75  import static java.lang.String.format;
76  import static org.apache.commons.math3.util.FastMath.random;
77  
78  /**
79   * Utilities for creating polymers.
80   *
81   * @author Michael Schnieders
82   * @since 1.0
83   */
84  public class PolymerUtils {
85  
86    private static final Logger logger = Logger.getLogger(PolymerUtils.class.getName());
87  
88    private static final double DEFAULT_AA_CHAINBREAK = 3.0;
89    private static final double DEFAULT_NA_CHAINBREAK_MULT = 2.0;
90  
91    /**
92     * Assign force field atoms types to common chemistries using "biotype" records.
93     *
94     * @param molecularAssembly MolecularAssembly to operate on.
95     * @param fileStandard PDB file standard to follow.
96     * @return A List of created bonds.
97     */
98    public static List<Bond> assignAtomTypes(MolecularAssembly molecularAssembly,
99        PDBFileStandard fileStandard) {
100     // Create a list to store bonds defined by PDB atom names.
101     List<Bond> bondList = new ArrayList<>();
102 
103     ForceField forceField = molecularAssembly.getForceField();
104     CompositeConfiguration properties = molecularAssembly.getProperties();
105 
106     // Only rename atoms if requested.
107     boolean standardizeAtomNames = properties.getBoolean("standardizeAtomNames", true);
108 
109     // To Do: Look for cyclic peptides and disulfides.
110     Polymer[] polymers = molecularAssembly.getChains();
111 
112     // Loop over chains.
113     if (polymers != null) {
114       logger.info(format("\n Assigning atom types for %d chains.", polymers.length));
115       for (Polymer polymer : polymers) {
116         List<Residue> residues = polymer.getResidues();
117         boolean isProtein = true;
118         // Check if all residues are known amino acids.
119         for (Residue residue : residues) {
120           String name = residue.getName().toUpperCase();
121           boolean aa = false;
122           for (AminoAcid3 amino : aminoAcidList) {
123             if (amino.toString().equalsIgnoreCase(name)) {
124               aa = true;
125               if (standardizeAtomNames) {
126                 checkHydrogenAtomNames(residue, fileStandard);
127               }
128               break;
129             }
130           }
131           // Check for a patch.
132           if (!aa) {
133             if (logger.isLoggable(Level.FINE)) {
134               logger.fine(" Checking for non-standard amino acid patch " + name);
135             }
136             HashMap<String, AtomType> types = forceField.getAtomTypes(name);
137             if (types.isEmpty()) {
138               isProtein = false;
139               break;
140             } else {
141               if (logger.isLoggable(Level.FINE)) {
142                 logger.fine(" Patch found for non-standard amino acid " + name);
143               }
144             }
145           }
146         }
147 
148         // If all the residues in this chain have known amino acids names, then attempt to assign
149         // atom types.
150         if (isProtein) {
151           try {
152             logger.info(format(" Amino acid chain %s", polymer.getName()));
153             double dist = properties.getDouble("chainbreak", DEFAULT_AA_CHAINBREAK);
154             // Detect main chain breaks!
155             List<List<Residue>> subChains = findChainBreaks(residues, dist);
156             for (List<Residue> subChain : subChains) {
157               assignAminoAcidAtomTypes(subChain, forceField, bondList);
158             }
159           } catch (BondedUtils.MissingHeavyAtomException missingHeavyAtomException) {
160             logger.log(Level.INFO, Utilities.stackTraceToString(missingHeavyAtomException));
161             logger.severe(missingHeavyAtomException.toString());
162           } catch (BondedUtils.MissingAtomTypeException missingAtomTypeException) {
163             logger.log(Level.INFO, Utilities.stackTraceToString(missingAtomTypeException));
164             logger.severe(missingAtomTypeException.toString());
165           }
166           continue;
167         }
168 
169         // Check if all residues have known nucleic acids names.
170         boolean isNucleicAcid = true;
171         for (Residue residue : residues) {
172           String currentName = residue.getName().toUpperCase();
173           // Convert 1 and 2-character nucleic acid names to 3-character names.
174           String name = switch (currentName) {
175             case "A" -> NucleicAcid3.ADE.toString();
176             case "C" -> NucleicAcid3.CYT.toString();
177             case "G" -> NucleicAcid3.GUA.toString();
178             case "T" -> NucleicAcid3.THY.toString();
179             case "U" -> NucleicAcid3.URI.toString();
180             case "YG" -> NucleicAcid3.YYG.toString();
181             case "DA" -> NucleicAcid3.DAD.toString();
182             case "DC" -> NucleicAcid3.DCY.toString();
183             case "DG" -> NucleicAcid3.DGU.toString();
184             case "DT" -> NucleicAcid3.DTY.toString();
185             default -> currentName;
186           };
187           residue.setName(name);
188           NucleicAcid3 nucleicAcid = null;
189           for (NucleicAcid3 nucleic : nucleicAcidList) {
190             String nuc3 = nucleic.toString();
191             nuc3 = nuc3.substring(nuc3.length() - 3);
192             if (nuc3.equalsIgnoreCase(name)) {
193               nucleicAcid = nucleic;
194               break;
195             }
196           }
197           if (nucleicAcid == null) {
198             logger.info(format("Nucleic acid was not recognized %s.", name));
199             isNucleicAcid = false;
200             break;
201           }
202         }
203 
204         /*
205          If all the residues in this chain have known nucleic acids
206          names, then attempt to assign atom types.
207         */
208         if (isNucleicAcid) {
209           try {
210             logger.info(format(" Nucleic acid chain %s", polymer.getName()));
211 
212             if (logger.isLoggable(Level.FINE)) {
213               logger.fine(
214                   format(" EXPERIMENTAL: Finding chain breaks for possible nucleic acid chain %s",
215                       polymer.getName()));
216             }
217             double dist = properties.getDouble("chainbreak", DEFAULT_AA_CHAINBREAK);
218             dist *= DEFAULT_NA_CHAINBREAK_MULT;
219             // Detect main chain breaks!
220             List<List<Residue>> subChains = findChainBreaks(residues, dist);
221 
222             for (List<Residue> subChain : subChains) {
223               assignNucleicAcidAtomTypes(subChain, forceField, bondList);
224             }
225           } catch (BondedUtils.MissingHeavyAtomException | BondedUtils.MissingAtomTypeException e) {
226             logger.log(Level.INFO, Utilities.stackTraceToString(e));
227             logger.severe(e.toString());
228           }
229         }
230       }
231     }
232 
233     // Assign ion atom types.
234     List<MSNode> ions = molecularAssembly.getIons();
235     if (ions != null && !ions.isEmpty()) {
236       logger.info(format(" Assigning atom types for %d ions.", ions.size()));
237       for (MSNode m : ions) {
238         Molecule ion = (Molecule) m;
239         String name = ion.getResidueName().toUpperCase();
240         NamingUtils.HetAtoms hetatm = NamingUtils.HetAtoms.parse(name);
241         Atom atom = ion.getAtomList().get(0);
242         if (ion.getAtomList().size() != 1) {
243           logger.severe(format(" Check residue %s of chain %s.", ion, ion.getChainID()));
244         }
245         try {
246           switch (hetatm) {
247             case NA -> atom.setAtomType(findAtomType(2004, forceField));
248             case K -> atom.setAtomType(findAtomType(2005, forceField));
249             case MG, MG2 -> atom.setAtomType(findAtomType(2008, forceField));
250             case CA, CA2 -> atom.setAtomType(findAtomType(2009, forceField));
251             case ZN, ZN2 -> atom.setAtomType(findAtomType(2016, forceField));
252             case CL -> atom.setAtomType(findAtomType(2013, forceField));
253             case BR -> atom.setAtomType(findAtomType(2012, forceField));
254             case I -> atom.setAtomType(findAtomType(2015, forceField));
255             default ->
256                 logger.severe(format(" Check residue %s of chain %s.", ion, ion.getChainID()));
257           }
258         } catch (Exception e) {
259           logger.log(Level.INFO, Utilities.stackTraceToString(e));
260           String message = "Error assigning atom types.";
261           logger.log(Level.SEVERE, message, e);
262         }
263       }
264     }
265     // Assign water atom types.
266     List<MSNode> water = molecularAssembly.getWater();
267     if (water != null && !water.isEmpty()) {
268       logger.info(format(" Assigning atom types for %d water.", water.size()));
269       for (MSNode m : water) {
270         Molecule wat = (Molecule) m;
271         try {
272           Atom O = buildHeavy(wat, "O", null, 2001, forceField, bondList);
273           Atom H1 = buildH(wat, "H1", O, 0.96e0, null, 109.5e0, null, 120.0e0, 0, 2002, forceField,
274               bondList);
275           Atom H2 = buildH(wat, "H2", O, 0.96e0, H1, 109.5e0, null, 120.0e0, 0, 2002, forceField,
276               bondList);
277           O.setHetero(true);
278           H1.setHetero(true);
279           H2.setHetero(true);
280         } catch (Exception e) {
281           logger.log(Level.INFO, Utilities.stackTraceToString(e));
282           String message = " Error assigning atom types to a water.";
283           logger.log(Level.SEVERE, message, e);
284         }
285       }
286     }
287 
288     // Assign small molecule atom types.
289     List<MSNode> molecules = molecularAssembly.getMolecules();
290     for (MSNode m : molecules) {
291       Molecule molecule = (Molecule) m;
292       String moleculeName = molecule.getResidueName();
293       logger.info(" Attempting to patch " + moleculeName);
294       List<Atom> moleculeAtoms = molecule.getAtomList();
295       boolean patched = true;
296       HashMap<String, AtomType> types = forceField.getAtomTypes(moleculeName);
297       // Assign atom types for all known atoms.
298       for (Atom atom : moleculeAtoms) {
299         String atomName = atom.getName().toUpperCase();
300         AtomType atomType = types.get(atomName);
301         if (atomType == null) {
302           logger.info(" No atom type was found for " + atomName + " of " + moleculeName + ".");
303           patched = false;
304           break;
305         } else {
306           logger.fine(" " + atom + " -> " + atomType);
307           atom.setAtomType(atomType);
308           types.remove(atomName);
309         }
310       }
311       // Create missing hydrogen atoms. Check for missing heavy atoms.
312       if (patched && !types.isEmpty()) {
313         for (AtomType type : types.values()) {
314           if (type.atomicNumber != 1) {
315             logger.info(" Missing heavy atom " + type.name);
316             patched = false;
317             break;
318           }
319         }
320       }
321       // Create bonds between known atoms.
322       if (patched) {
323         for (Atom atom : moleculeAtoms) {
324           String atomName = atom.getName();
325           String[] bonds = forceField.getBonds(moleculeName, atomName);
326           if (bonds != null) {
327             for (String name : bonds) {
328               Atom atom2 = molecule.getAtom(name);
329               if (atom2 != null && !atom.isBonded(atom2)) {
330                 buildBond(atom, atom2, forceField, bondList);
331               }
332             }
333           }
334         }
335       }
336       // Create missing hydrogen atoms.
337       if (patched && !types.isEmpty()) {
338         // Create a map of the molecule's atoms
339         Map<String, Atom> atomMap = new HashMap<>();
340         for (Atom atom : moleculeAtoms) {
341           atomMap.put(atom.getName().toUpperCase(), atom);
342         }
343         for (String atomName : types.keySet()) {
344           AtomType type = types.get(atomName);
345           String[] bonds = forceField.getBonds(moleculeName, atomName.toUpperCase());
346           if (bonds == null || bonds.length != 1) {
347             patched = false;
348             logger.info(" Check biotype for hydrogen " + type.name + ".");
349             break;
350           }
351           // Get the heavy atom the hydrogen is bonded to.
352           Atom ia = atomMap.get(bonds[0].toUpperCase());
353           Atom hydrogen = new Atom(0, atomName, ia.getAltLoc(), new double[3], ia.getResidueName(),
354               ia.getResidueNumber(), ia.getChainID(), ia.getOccupancy(), ia.getTempFactor(),
355               ia.getSegID());
356           logger.fine(" Created hydrogen " + atomName + ".");
357           hydrogen.setAtomType(type);
358           hydrogen.setHetero(true);
359           molecule.addMSNode(hydrogen);
360           int valence = ia.getAtomType().valence;
361           List<Bond> aBonds = ia.getBonds();
362           int numBonds = aBonds.size();
363           // Try to find the following configuration: ib-ia-ic
364           Atom ib = null;
365           Atom ic = null;
366           Atom id = null;
367           if (numBonds > 0) {
368             Bond bond = aBonds.get(0);
369             ib = bond.get1_2(ia);
370           }
371           if (numBonds > 1) {
372             Bond bond = aBonds.get(1);
373             ic = bond.get1_2(ia);
374           }
375           if (numBonds > 2) {
376             Bond bond = aBonds.get(2);
377             id = bond.get1_2(ia);
378           }
379 
380           // Building the hydrogen depends on hybridization and the locations of other bonded
381           // atoms.
382           logger.fine(
383               " Bonding " + atomName + " to " + ia.getName() + " (" + numBonds + " of " + valence
384                   + ").");
385           switch (valence) {
386             case 4 -> {
387               switch (numBonds) {
388                 case 3 -> {
389                   // Find the average coordinates of atoms ib, ic and id.
390                   double[] b = ib.getXYZ(null);
391                   double[] c = ic.getXYZ(null);
392                   double[] d = id.getXYZ(null);
393                   double[] a = new double[3];
394                   a[0] = (b[0] + c[0] + d[0]) / 3.0;
395                   a[1] = (b[1] + c[1] + d[1]) / 3.0;
396                   a[2] = (b[2] + c[2] + d[2]) / 3.0;
397                   // Place the hydrogen at chiral position #1.
398                   intxyz(hydrogen, ia, 1.0, ib, 109.5, ic, 109.5, 1);
399                   double[] e1 = new double[3];
400                   hydrogen.getXYZ(e1);
401                   double[] ret = new double[3];
402                   sub(a, e1, ret);
403                   double l1 = length(ret);
404                   // Place the hydrogen at chiral position #2.
405                   intxyz(hydrogen, ia, 1.0, ib, 109.5, ic, 109.5, -1);
406                   double[] e2 = new double[3];
407                   hydrogen.getXYZ(e2);
408                   sub(a, e2, ret);
409                   double l2 = length(ret);
410                   // Revert to #1 if it is farther from the average.
411                   if (l1 > l2) {
412                     hydrogen.setXYZ(e1);
413                   }
414                 }
415                 case 2 -> intxyz(hydrogen, ia, 1.0, ib, 109.5, ic, 109.5, 0);
416                 case 1 -> intxyz(hydrogen, ia, 1.0, ib, 109.5, null, 0.0, 0);
417                 case 0 -> intxyz(hydrogen, ia, 1.0, null, 0.0, null, 0.0, 0);
418                 default -> {
419                   logger.info(" Check biotype for hydrogen " + atomName + ".");
420                   patched = false;
421                 }
422               }
423             }
424             case 3 -> {
425               switch (numBonds) {
426                 case 2 -> intxyz(hydrogen, ia, 1.0, ib, 120.0, ic, 0.0, 0);
427                 case 1 -> intxyz(hydrogen, ia, 1.0, ib, 120.0, null, 0.0, 0);
428                 case 0 -> intxyz(hydrogen, ia, 1.0, null, 0.0, null, 0.0, 0);
429                 default -> {
430                   logger.info(" Check biotype for hydrogen " + atomName + ".");
431                   patched = false;
432                 }
433               }
434             }
435             case 2 -> {
436               switch (numBonds) {
437                 case 1 -> intxyz(hydrogen, ia, 1.0, ib, 120.0, null, 0.0, 0);
438                 case 0 -> intxyz(hydrogen, ia, 1.0, null, 0.0, null, 0.0, 0);
439                 default -> {
440                   logger.info(" Check biotype for hydrogen " + atomName + ".");
441                   patched = false;
442                 }
443               }
444             }
445             case 1 -> {
446               if (numBonds == 0) {
447                 intxyz(hydrogen, ia, 1.0, null, 0.0, null, 0.0, 0);
448               } else {
449                 logger.info(" Check biotype for hydrogen " + atomName + ".");
450                 patched = false;
451               }
452             }
453             default -> {
454               logger.info(" Check biotype for hydrogen " + atomName + ".");
455               patched = false;
456             }
457           }
458           if (!patched) {
459             break;
460           } else {
461             buildBond(ia, hydrogen, forceField, bondList);
462           }
463         }
464       }
465       if (!patched) {
466         logger.log(Level.INFO, format(" Deleting unrecognized molecule %s.", m));
467         molecularAssembly.deleteMolecule((Molecule) m);
468       } else {
469         logger.info(" Patch for " + moleculeName + " succeeded.");
470       }
471     }
472     resolvePolymerLinks(molecules, molecularAssembly, bondList);
473     return bondList;
474   }
475 
476   /**
477    * Assign parameters to disulfide bonds.
478    *
479    * @param ssBondList List of SSBOND records.
480    * @param molecularAssembly MolecularAssembly to operate on.
481    * @param bondList Add new SS-Bonds to this list.
482    */
483   public static void buildDisulfideBonds(List<Bond> ssBondList, MolecularAssembly molecularAssembly,
484       List<Bond> bondList) {
485     StringBuilder sb = new StringBuilder(" Disulfide Bonds:");
486     ForceField forceField = molecularAssembly.getForceField();
487     for (Bond bond : ssBondList) {
488       Atom a1 = bond.getAtom(0);
489       Atom a2 = bond.getAtom(1);
490       BondType bondType = forceField.getBondType(a1.getAtomType(), a2.getAtomType());
491       if (bondType == null) {
492         logNoBondType(a1, a2, forceField);
493       } else {
494         bond.setBondType(bondType);
495       }
496       double d = dist(a1.getXYZ(null), a2.getXYZ(null));
497       Polymer c1 = molecularAssembly.getChain(a1.getSegID());
498       Polymer c2 = molecularAssembly.getChain(a2.getSegID());
499       Residue r1 = c1.getResidue(a1.getResidueNumber());
500       Residue r2 = c2.getResidue(a2.getResidueNumber());
501       sb.append(format("\n S-S distance of %6.2f for %s and %s.", d, r1.toString(), r2.toString()));
502       bondList.add(bond);
503     }
504     if (!ssBondList.isEmpty()) {
505       logger.info(sb.toString());
506     }
507   }
508 
509   /**
510    * Currently builds missing internal loops based on information in DBREF and SEQRES records.
511    *
512    * <p>Known limitations include: 1) No building n- and c-terminal loops. 2) No support for DBREF1
513    * or DBREF2 records. 3) Incomplete optimization scheme to position the loops.
514    *
515    * @param xyzIndex XYZ index to begin from.
516    * @param molecularAssembly MolecularAssembly to operate on.
517    * @param seqres Map of SEQRES entries.
518    * @param dbref Map of DBREF entries.
519    * @return xyzIndex updated based on built atoms.
520    */
521   public static int buildMissingResidues(int xyzIndex, MolecularAssembly molecularAssembly,
522       Map<Character, String[]> seqres, Map<Character, int[]> dbref) {
523 
524     // Only build loops if the buildLoops flag is true.
525     CompositeConfiguration properties = molecularAssembly.getProperties();
526     if (!properties.getBoolean("buildLoops", false)) {
527       return xyzIndex;
528     }
529 
530     Polymer[] polymers = molecularAssembly.getChains();
531     for (Polymer polymer : polymers) {
532       Character chainID = polymer.getChainID();
533       String[] resNames = seqres.get(chainID);
534       int[] seqRange = dbref.get(chainID);
535       if (resNames == null || seqRange == null) {
536         continue;
537       }
538       int seqBegin = seqRange[0];
539       int seqEnd = seqRange[1];
540       logger.info(
541           format("\n Checking for missing residues in chain %s between residues %d and %d.", polymer,
542               seqBegin, seqEnd));
543 
544       int firstResID = polymer.getFirstResidue().getResidueNumber();
545       for (int i = 0; i < resNames.length; i++) {
546         int currentID = seqBegin + i;
547 
548         Residue currentResidue = polymer.getResidue(currentID);
549         if (currentResidue != null) {
550           continue;
551         }
552 
553         if (currentID <= firstResID) {
554           logger.info(
555               format(" Residue %d is missing, but is at the beginning of the chain.", currentID));
556           continue;
557         }
558 
559         Residue previousResidue = polymer.getResidue(currentID - 1);
560         if (previousResidue == null) {
561           logger.info(
562               format(" Residue %d is missing, but could not be build (previous residue missing).",
563                   currentID));
564           continue;
565         }
566 
567         Residue nextResidue = null;
568         for (int j = currentID + 1; j <= seqEnd; j++) {
569           nextResidue = polymer.getResidue(j);
570           if (nextResidue != null) {
571             break;
572           }
573         }
574         // Residues at the end of the chain are not currently built.
575         if (nextResidue == null) {
576           logger.info(format(" Residue %d is missing, but is at the end of the chain.", currentID));
577           break;
578         }
579         // Find the previous carbonyl carbon and next nitrogen.
580         Atom C = (Atom) previousResidue.getAtomNode("C");
581         Atom N = (Atom) nextResidue.getAtomNode("N");
582         if (C == null || N == null) {
583           logger.info(
584               format(" Residue %d is missing, but bonding atoms are missing (C or N).", currentID));
585           continue;
586         }
587 
588         // Build the missing residue.
589         currentResidue = polymer.getResidue(resNames[i], currentID, true);
590 
591         double[] vector = new double[3];
592         int count = 3 * (nextResidue.getResidueNumber() - previousResidue.getResidueNumber());
593         sub(N.getXYZ(null), C.getXYZ(null), vector);
594         scale(vector, 1.0 / count, vector);
595 
596         double[] nXYZ = new double[3];
597         add(C.getXYZ(null), vector, nXYZ);
598         nXYZ[0] += random() - 0.5;
599         nXYZ[1] += random() - 0.5;
600         nXYZ[2] += random() - 0.5;
601         Atom newN = new Atom(xyzIndex++, "N", C.getAltLoc(), nXYZ, resNames[i], currentID, chainID,
602             1.0, C.getTempFactor(), C.getSegID(), true);
603         currentResidue.addMSNode(newN);
604 
605         double[] caXYZ = new double[3];
606         scale(vector, 2.0, vector);
607         add(C.getXYZ(null), vector, caXYZ);
608         caXYZ[0] += Math.random() - 0.5;
609         caXYZ[1] += Math.random() - 0.5;
610         caXYZ[2] += Math.random() - 0.5;
611         Atom newCA = new Atom(xyzIndex++, "CA", C.getAltLoc(), caXYZ, resNames[i], currentID,
612             chainID, 1.0, C.getTempFactor(), C.getSegID(), true);
613         currentResidue.addMSNode(newCA);
614 
615         double[] cXYZ = new double[3];
616         scale(vector, 1.5, vector);
617         add(C.getXYZ(null), vector, cXYZ);
618         cXYZ[0] += Math.random() - 0.5;
619         cXYZ[1] += Math.random() - 0.5;
620         cXYZ[2] += Math.random() - 0.5;
621         Atom newC = new Atom(xyzIndex++, "C", C.getAltLoc(), cXYZ, resNames[i], currentID, chainID,
622             1.0, C.getTempFactor(), C.getSegID(), true);
623         currentResidue.addMSNode(newC);
624 
625         double[] oXYZ = new double[3];
626         vector[0] = Math.random() - 0.5;
627         vector[1] = Math.random() - 0.5;
628         vector[2] = Math.random() - 0.5;
629         add(cXYZ, vector, oXYZ);
630         Atom newO = new Atom(xyzIndex++, "O", C.getAltLoc(), oXYZ, resNames[i], currentID, chainID,
631             1.0, C.getTempFactor(), C.getSegID(), true);
632         currentResidue.addMSNode(newO);
633         logger.info(format(" Building residue %8s.", currentResidue));
634       }
635     }
636     return xyzIndex;
637   }
638 
639   public static List<List<Residue>> findChainBreaks(List<Residue> residues, double cutoff) {
640     List<List<Residue>> subChains = new ArrayList<>();
641 
642     // Chain-start atom: N (amino)/O5* (nucleic)
643     // Chain-end atom:   C (amino)/O3* (nucleic)
644     ResidueType rType = residues.get(0).getResidueType();
645     String startAtName = null;
646     String endAtName = null;
647     switch (rType) {
648       case AA -> {
649         startAtName = "N";
650         endAtName = "C";
651       }
652       case NA -> {
653         boolean namedStar = residues.stream().flatMap((Residue r) -> r.getAtomList().stream())
654             .anyMatch((Atom a) -> a.getName().equals("O5*"));
655         if (namedStar) {
656           startAtName = "O5*";
657           endAtName = "O3*";
658         } else {
659           startAtName = "O5'";
660           endAtName = "O3'";
661         }
662       }
663       case UNK -> {
664         logger.fine(" Not attempting to find chain breaks for chain with residue " + residues.get(0)
665             .toString());
666         List<List<Residue>> retList = new ArrayList<>();
667         retList.add(residues);
668         return retList;
669       }
670     }
671 
672     List<Residue> subChain = null;
673     Residue previousResidue = null;
674     Atom priorEndAtom = null;
675     StringBuilder sb = new StringBuilder(" Chain Breaks:");
676 
677     for (Residue residue : residues) {
678       List<Atom> resAtoms = residue.getAtomList();
679       if (priorEndAtom == null) {
680         // Initialization.
681         subChain = new ArrayList<>();
682         subChain.add(residue);
683         subChains.add(subChain);
684       } else {
685         // Find the start atom of the current residue.
686         Atom startAtom = null;
687         for (Atom a : resAtoms) {
688           if (a.getName().equalsIgnoreCase(startAtName)) {
689             startAtom = a;
690             break;
691           }
692         }
693         if (startAtom == null) {
694           subChain.add(residue);
695           continue;
696         }
697         // Compute the distance between the previous carbonyl carbon and the current nitrogen.
698         double r = dist(priorEndAtom.getXYZ(null), startAtom.getXYZ(null));
699         if (r > cutoff) {
700           // Start a new chain.
701           subChain = new ArrayList<>();
702           subChain.add(residue);
703           subChains.add(subChain);
704           char ch1 = previousResidue.getChainID();
705           char ch2 = residue.getChainID();
706           sb.append(
707               format("\n C-N distance of %6.2f A for %c-%s and %c-%s.", r, ch1, previousResidue, ch2,
708                   residue));
709         } else {
710           // Continue the current chain.
711           subChain.add(residue);
712         }
713       }
714 
715       // Save the carbonyl carbon.
716       for (Atom a : resAtoms) {
717         if (a.getName().equalsIgnoreCase(endAtName)) {
718           priorEndAtom = a;
719           break;
720         }
721       }
722       previousResidue = residue;
723     }
724 
725     if (subChains.size() > 1) {
726       logger.info(sb.toString());
727     }
728 
729     return subChains;
730   }
731 
732   /**
733    * Locate disulfide bonds based on SSBOND records.
734    *
735    * @param ssbonds List of SSBOND records.
736    * @param molecularAssembly The MolecularAssembly to operate on.
737    * @param pdbToNewResMap Maps chainIDResNumInsCode to renumbered chainIDResNum. For example,
738    *     residue 52A in chain C might be renumbered to residue 53, and mapped as "C52A" to "C53".
739    * @return A List of Bond instances for SS-Bonds.
740    */
741   public static List<Bond> locateDisulfideBonds(List<String> ssbonds,
742       MolecularAssembly molecularAssembly, Map<String, String> pdbToNewResMap) {
743     List<Bond> ssBondList = new ArrayList<>();
744     for (String ssbond : ssbonds) {
745       // =============================================================================
746       // The SSBOND record identifies each disulfide bond in protein and polypeptide
747       // structures by identifying the two residues involved in the bond.
748       // The disulfide bond distance is included after the symmetry operations at
749       // the end of the SSBOND record.
750       //
751       //  8 - 10        Integer         serNum       Serial number.
752       // 12 - 14        LString(3)      "CYS"        Residue name.
753       // 16             Character       chainID1     Chain identifier.
754       // 18 - 21        Integer         seqNum1      Residue sequence number.
755       // 22             AChar           icode1       Insertion code.
756       // 26 - 28        LString(3)      "CYS"        Residue name.
757       // 30             Character       chainID2     Chain identifier.
758       // 32 - 35        Integer         seqNum2      Residue sequence number.
759       // 36             AChar           icode2       Insertion code.
760       // 60 - 65        SymOP           sym1         Symmetry oper for 1st resid
761       // 67 - 72        SymOP           sym2         Symmetry oper for 2nd resid
762       // 74 – 78        Real(5.2)      Length        Disulfide bond distance
763       //
764       // If SG of cysteine is disordered then there are possible alternate linkages.
765       // wwPDB practice is to put together all possible SSBOND records. This is
766       // problematic because the alternate location identifier is not specified in
767       // the SSBOND record.
768       // =============================================================================
769       try {
770         char c1ch = ssbond.charAt(15);
771         char c2ch = ssbond.charAt(29);
772         Polymer c1 = molecularAssembly.getChain(format("%c", c1ch));
773         Polymer c2 = molecularAssembly.getChain(format("%c", c2ch));
774 
775         Polymer[] chains = molecularAssembly.getChains();
776         if (c1 == null) {
777           c1 = chains[0];
778         }
779         if (c2 == null) {
780           c2 = chains[0];
781         }
782 
783         String origResNum1 = ssbond.substring(17, 21).trim();
784         char insChar1 = ssbond.charAt(21);
785         String origResNum2 = ssbond.substring(31, 35).trim();
786         char insChar2 = ssbond.charAt(35);
787 
788         String pdbResNum1 = format("%c%s%c", c1ch, origResNum1, insChar1);
789         String pdbResNum2 = format("%c%s%c", c2ch, origResNum2, insChar2);
790         String resnum1 = pdbToNewResMap.get(pdbResNum1);
791         String resnum2 = pdbToNewResMap.get(pdbResNum2);
792         if (resnum1 == null) {
793           logger.warning(format(" Could not find residue %s for SS-bond %s", pdbResNum1, ssbond));
794           continue;
795         }
796         if (resnum2 == null) {
797           logger.warning(format(" Could not find residue %s for SS-bond %s", pdbResNum2, ssbond));
798           continue;
799         }
800 
801         Residue r1 = c1.getResidue(parseInt(resnum1.substring(1)));
802         Residue r2 = c2.getResidue(parseInt(resnum2.substring(1)));
803         List<Atom> atoms1 = r1.getAtomList();
804         List<Atom> atoms2 = r2.getAtomList();
805         Atom SG1 = null;
806         Atom SG2 = null;
807         for (Atom atom : atoms1) {
808           if (atom.getName().equalsIgnoreCase("SG")) {
809             SG1 = atom;
810             break;
811           }
812         }
813         for (Atom atom : atoms2) {
814           if (atom.getName().equalsIgnoreCase("SG")) {
815             SG2 = atom;
816             break;
817           }
818         }
819         if (SG1 == null) {
820           logger.warning(format(" SG atom 1 of SS-bond %s is null", ssbond));
821         }
822         if (SG2 == null) {
823           logger.warning(format(" SG atom 2 of SS-bond %s is null", ssbond));
824         }
825         if (SG1 == null || SG2 == null) {
826           continue;
827         }
828         double d = dist(SG1.getXYZ(null), SG2.getXYZ(null));
829         if (d < 5.0) {
830           r1.setName("CYX");
831           r2.setName("CYX");
832           for (Atom atom : atoms1) {
833             atom.setResName("CYX");
834           }
835           for (Atom atom : atoms2) {
836             atom.setResName("CYX");
837           }
838           Bond bond = new Bond(SG1, SG2);
839           ssBondList.add(bond);
840         } else {
841           String message = format("Ignoring [%s]\n due to distance %8.3f A.", ssbond, d);
842           logger.log(Level.WARNING, message);
843         }
844       } catch (Exception e) {
845         String message = format("Ignoring [%s]", ssbond);
846         logger.log(Level.WARNING, message, e);
847       }
848     }
849     return ssBondList;
850   }
851 
852   /**
853    * Resolves links between polymeric hetero groups; presently only functional for cyclic molecules.
854    *
855    * @param molecules List of Molecules in the molecular assembly.
856    * @param molecularAssembly MolecularAssembly to operate on.
857    * @param bondList Add created bonds to this list.
858    */
859   public static void resolvePolymerLinks(List<MSNode> molecules, MolecularAssembly molecularAssembly,
860       List<Bond> bondList) {
861 
862     ForceField forceField = molecularAssembly.getForceField();
863     CompositeConfiguration properties = molecularAssembly.getProperties();
864 
865     for (String polyLink : properties.getStringArray("polymerlink")) {
866       logger.info(" Experimental: linking a cyclic hetero group: " + polyLink);
867 
868       // Format: polymerlink resname atom1 atom2 [cyclize]
869       String[] toks = polyLink.split("\\s+");
870       String resName = toks[0];
871       String name1 = toks[1];
872       String name2 = toks[2];
873       int cyclicLen = 0;
874       if (toks.length > 3) {
875         cyclicLen = parseInt(toks[3]);
876       }
877 
878       List<Molecule> matches = new ArrayList<>();
879       for (MSNode node : molecules) {
880         Molecule m = (Molecule) node;
881         if (m.getResidueName().equalsIgnoreCase(resName)) {
882           matches.add(m);
883         }
884       }
885 
886       for (int i = 0; i < matches.size(); i++) {
887         Molecule mi = matches.get(i);
888         int ii = i + 1;
889         if (cyclicLen < 1) {
890           logger.severe(" No current support for polymeric, non-cyclic hetero groups");
891           // Would probably split by chain.
892         } else {
893           Molecule next;
894           if (ii % cyclicLen == 0) {
895             next = matches.get(ii - cyclicLen);
896             logger.info(format(" Cyclizing molecule %s to %s", mi, next));
897           } else {
898             next = matches.get(ii);
899             logger.info(format(" Extending chain from %s to %s.", mi, next));
900           }
901           Atom from = mi.getAtomByName(name1, true);
902           Atom to = next.getAtomByName(name2, true);
903           buildBond(from, to, forceField, bondList);
904         }
905       }
906     }
907   }
908 }