View Javadoc
1   // ******************************************************************************
2   //
3   // Title:       Force Field X.
4   // Description: Force Field X - Software for Molecular Biophysics.
5   // Copyright:   Copyright (c) Michael J. Schnieders 2001-2025.
6   //
7   // This file is part of Force Field X.
8   //
9   // Force Field X is free software; you can redistribute it and/or modify it
10  // under the terms of the GNU General Public License version 3 as published by
11  // the Free Software Foundation.
12  //
13  // Force Field X is distributed in the hope that it will be useful, but WITHOUT
14  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16  // details.
17  //
18  // You should have received a copy of the GNU General Public License along with
19  // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple
20  // Place, Suite 330, Boston, MA 02111-1307 USA
21  //
22  // Linking this library statically or dynamically with other modules is making a
23  // combined work based on this library. Thus, the terms and conditions of the
24  // GNU General Public License cover the whole combination.
25  //
26  // As a special exception, the copyright holders of this library give you
27  // permission to link this library with independent modules to produce an
28  // executable, regardless of the license terms of these independent modules, and
29  // to copy and distribute the resulting executable under terms of your choice,
30  // provided that you also meet, for each linked independent module, the terms
31  // and conditions of the license of that module. An independent module is a
32  // module which is not derived from or based on this library. If you modify this
33  // library, you may extend this exception to your version of the library, but
34  // you are not obligated to do so. If you do not wish to do so, delete this
35  // exception statement from your version.
36  //
37  // ******************************************************************************
38  package ffx.potential;
39  
40  import static ffx.numerics.math.DoubleMath.sub;
41  import static ffx.potential.bonded.NamingUtils.renameAminoAcidToPDBStandard;
42  import static ffx.potential.bonded.NamingUtils.renameNucleicAcidToPDBStandard;
43  import static org.apache.commons.math3.util.FastMath.sqrt;
44  
45  import ffx.potential.bonded.AminoAcidUtils;
46  import ffx.potential.bonded.Atom;
47  import ffx.potential.bonded.Bond;
48  import ffx.potential.bonded.Molecule;
49  import ffx.potential.bonded.NucleicAcidUtils;
50  import ffx.potential.bonded.Polymer;
51  import ffx.potential.bonded.Residue;
52  import java.io.ByteArrayOutputStream;
53  import java.io.PrintStream;
54  import java.nio.charset.StandardCharsets;
55  import java.util.ArrayList;
56  import java.util.List;
57  import java.util.ListIterator;
58  import java.util.logging.Logger;
59  
60  /**
61   * The Utilities class provides methods to locate functional units of an organic system.
62   *
63   * @author Michael J. Schnieders
64   * @since 1.0
65   */
66  public final class Utilities {
67  
68    private static final Logger logger = Logger.getLogger(Utilities.class.getName());
69    /**
70     * The algorithms used to split arrays of atoms into a structural hierarchy use a bunch of List
71     * instances. Pooling them seems like it might be a performance win - although better algorithms
72     * probably exist. This is currently backed by ArrayLists.
73     */
74    private static final List<List<Atom>> atomListPool = new ArrayList<>();
75  
76    /**
77     * Finds the RMS deviation between the atoms of MolecularAssembly m1 and m2 provided they have the
78     * same number of atoms.
79     *
80     * @param m1 a {@link ffx.potential.MolecularAssembly} object.
81     * @param m2 a {@link ffx.potential.MolecularAssembly} object.
82     * @return a double.
83     */
84    public static double RMSCoordDev(MolecularAssembly m1, MolecularAssembly m2) {
85      if (m1 == null || m2 == null) {
86        return 0;
87      }
88      int n1 = m1.getAtomList().size();
89      int n2 = m2.getAtomList().size();
90      if (n1 != n2) {
91        return 0;
92      }
93      Atom a1, a2;
94      double[] d = new double[3];
95      double[] da = new double[3];
96      double[] db = new double[3];
97      double rms = 0;
98      ListIterator<Atom> li, lj;
99      for (li = m1.getAtomList().listIterator(), lj = m2.getAtomList().listIterator();
100         li.hasNext(); ) {
101       a1 = li.next();
102       a2 = lj.next();
103       a1.getXYZ(da);
104       a2.getXYZ(db);
105       sub(da, db, d);
106       rms += d[0] * d[0] + d[1] * d[1] + d[2] * d[2];
107     }
108     return sqrt(rms / n1);
109   }
110 
111   /**
112    * This routine sub-divides a system into groups of ions, water, hetero molecules, and
113    * polynucleotides/polypeptides.
114    *
115    * @param molecularAssembly a {@link ffx.potential.MolecularAssembly} object.
116    * @param atoms a {@link java.util.List} object.
117    */
118   public static void biochemistry(MolecularAssembly molecularAssembly, List<Atom> atoms) {
119     
120     Atom atom, seed = null;
121     int num = 0;
122     int waterNum = 0;
123     int ionNum = 0;
124     int moleculeNum = 0;
125     List<String> segIDs = new ArrayList<>();
126     while (!atoms.isEmpty()) {
127       /*
128        Nitrogen is used to "seed" a backbone search because carbon can
129        be separated from the backbone by a sulfur (e.g., MET).
130       */
131       for (Atom a : atoms) {
132         seed = a;
133         if (seed.getAtomicNumber() == 7) {
134           break;
135         }
136       }
137       // If no nitrogen atoms remain, there are no nucleic or amino acids.
138       if (seed.getAtomicNumber() != 7) {
139         List<Atom> moleculeAtoms;
140         while (!atoms.isEmpty()) {
141           atom = atoms.get(0);
142           if (atom.getNumBonds() == 0) {
143             // A metal ion or noble gas
144             ionNum++;
145             Molecule ion = new Molecule(atom.getName() + "-" + ionNum);
146             ion.addMSNode(atom);
147             atoms.remove(0);
148             molecularAssembly.addMSNode(ion);
149             continue;
150           } else if (atom.getAtomicNumber() == 8 && isWaterOxygen(atom)) {
151             // Water
152             waterNum++;
153             Molecule water = new Molecule("Water-" + waterNum);
154             water.addMSNode(atom);
155             atoms.remove(0);
156             List<Bond> bonds = atom.getBonds();
157             for (Bond b : bonds) {
158               Atom hydrogen = b.get1_2(atom);
159               water.addMSNode(hydrogen);
160               atoms.remove(hydrogen);
161             }
162             molecularAssembly.addMSNode(water);
163             continue;
164           }
165           // All other molecules
166           moleculeNum++;
167           Molecule molecule = new Molecule("Molecule-" + moleculeNum);
168           moleculeAtoms = getAtomListFromPool();
169           collectAtoms(atoms.get(0), moleculeAtoms, true);
170           while (!moleculeAtoms.isEmpty()) {
171             atom = moleculeAtoms.get(0);
172             moleculeAtoms.remove(0);
173             molecule.addMSNode(atom);
174             atoms.remove(atom);
175           }
176           molecularAssembly.addMSNode(molecule);
177         }
178         break;
179       }
180 
181       List<Atom> backbone = findPolymer(seed, null);
182       if (backbone != null && !backbone.isEmpty()) {
183         seed = backbone.get(backbone.size() - 1);
184         backbone = findPolymer(seed, null);
185       }
186 
187 
188       Character chainID = getChainID(num);
189       String segID = getSegID(chainID, segIDs);
190       Polymer c = new Polymer(chainID, segID, true);
191 
192       if (backbone != null && backbone.size() > 2 && divideBackbone(backbone, c)) {
193         for (Atom a : c.getAtomList()) {
194           atoms.remove(a);
195         }
196         molecularAssembly.addMSNode(c);
197         num++;
198       } else {
199         // The divideBackone method may have set the parent of some atoms before failing. Clear them.
200         for (Atom a : atoms) {
201           a.setParent(null);
202         }
203         moleculeNum++;
204         Molecule hetero = new Molecule("Molecule-" + moleculeNum);
205         atom = backbone.get(0);
206         List<Atom> heteroAtomList = getAtomListFromPool();
207         collectAtoms(atom, heteroAtomList,  true);
208         for (Atom a : heteroAtomList) {
209           hetero.addMSNode(a);
210         }
211         for (Atom a : hetero.getAtomList()) {
212           atoms.remove(a);
213         }
214         molecularAssembly.addMSNode(hetero);
215       }
216     }
217   }
218 
219   /**
220    * Returns an atom bonded to the "end" atom, which is not equal to "other".
221    *
222    * @param end Atom
223    * @param other Atom
224    * @return Atom
225    */
226   public static Atom findSeed(Atom end, Atom other) {
227     for (Bond b : end.getBonds()) {
228       Atom seed = b.get1_2(end);
229       if (seed != other) {
230         return seed;
231       }
232     }
233     return null;
234   }
235 
236   /**
237    * Determine chainID for a given polymer number.
238    *
239    * @param i int
240    * @return Character
241    */
242   public static Character getChainID(int i) {
243     if (i > 35) {
244       i = i % 36;
245     }
246     char c;
247     if (i < 26) {
248       // 65 is 'A'. 90 is 'Z'.
249       c = (char) (i + 65);
250     } else {
251       i -= 26;
252       // 48 is '0'. 57 is '9'.
253       c = (char) (i + 48);
254     }
255     return c;
256   }
257 
258   /**
259    * Gets the stack trace for an exception and converts it to a String.
260    *
261    * @param ex An exception.
262    * @return A String of its stack trace.
263    */
264   public static String stackTraceToString(Throwable ex) {
265     ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
266     try (PrintStream ps = new PrintStream(byteArrayOutputStream, true, StandardCharsets.UTF_8)) {
267       ex.printStackTrace(ps);
268       return byteArrayOutputStream.toString(StandardCharsets.UTF_8);
269     } catch (Exception e) {
270       logger.warning("Unable to convert stack trace to String");
271       return "";
272     }
273   }
274 
275   /**
276    * addAtomListToPool
277    *
278    * @param a a {@link java.util.List} object.
279    */
280   private static void addAtomListToPool(List<Atom> a) {
281     a.clear();
282     atomListPool.add(a);
283   }
284 
285   /**
286    * Collect all the atoms in a polymer opposite the end atom, and put them into the residue. This
287    * assumes that the "cap" is only linked to the rest of the polymer through the end atom.
288    *
289    * @param end Atom
290    * @param seed Atom
291    * @param residue Residue
292    */
293   private static void addCap(Atom end, Atom seed, Residue residue) {
294     List<Atom> cap = getAtomListFromPool();
295     cap.add(end);
296     collectAtoms(seed, cap);
297     // Assume the end & seed atoms are already part of the residue
298     cap.remove(0);
299     cap.remove(0);
300     for (Atom a : cap) {
301       residue.addMSNode(a);
302     }
303   }
304 
305   /**
306    * Add a phosphate and its bonded oxygens that are not bonded to a carbon to the specified
307    * residue.
308    *
309    * @param phosphate Atom
310    * @param residue Residue
311    */
312   private static void addPhosphate(Atom phosphate, Residue residue) {
313     if (phosphate == null) {
314       return;
315     }
316     residue.addMSNode(phosphate);
317     for (Bond b : phosphate.getBonds()) {
318       Atom oxygen = b.get1_2(phosphate);
319       // Add oxygens not bonded to a Carbon
320       if (numberOfBondsWith(oxygen, 6) == 0) {
321         residue.addMSNode(oxygen);
322         // Add hydrogens atoms for protonated oxygen groups
323         Atom hydrogen = findBondWith(oxygen, 1);
324         if (hydrogen != null) {
325           residue.addMSNode(hydrogen);
326         }
327       }
328     }
329   }
330 
331   private static Residue assignResidue(
332       List<Atom> backbone, int start, List<Atom> atoms, List<Atom> sidePolymer) {
333     int atomicnum;
334     char[] chars = {'S', 'P', 'O', 'N', 'C'};
335     // Bin number to atomic number => 0 = S, 1 = P, 2 = O, 3 = N, 4 = C
336     int[] bins = new int[5];
337 
338     for (Atom a : sidePolymer) {
339       atomicnum = a.getAtomicNumber();
340       switch (atomicnum) {
341         case 1:
342           // ignore hydrogens
343           break;
344         case 6:
345           // Carbon
346           bins[4]++;
347           break;
348         case 7:
349           // Nitrogen
350           bins[3]++;
351           break;
352         case 8:
353           // Oxygen
354           bins[2]++;
355           break;
356         case 15:
357           // Phosphorus
358           bins[1]++;
359           break;
360         case 16:
361           // Sulfur
362           bins[0]++;
363           break;
364         default:
365           return null;
366       }
367     }
368     StringBuilder key = new StringBuilder();
369     int atomCount = 0;
370     for (int i = 0; i < 5; i++) {
371       if (bins[i] != 0) {
372         atomCount += bins[i];
373         key.append(chars[i]);
374         key.append(bins[i]);
375       }
376     }
377     if (atomCount == 0) {
378       key.append("H"); // Glycine
379     }
380 
381     String resname = AminoAcidUtils.sidechainStoichiometry.get(key.toString());
382 
383     if (resname == null) {
384       resname = "Unknown";
385     } else {
386       resname = resname.intern();
387     }
388 
389     if (resname.equals("1") || resname.equals("2")) {
390       // Special case where atom string keys aren't unique
391       Atom alpha = backbone.get(start + 1);
392       Atom carbonyl = backbone.get(start + 2);
393       Atom beta = null;
394       List<Bond> alphabonds = alpha.getBonds();
395       for (Bond bond : alphabonds) {
396         beta = bond.get1_2(alpha);
397         // Don't want the peptide nitrogen or alpha hydrogen or carbonyl carbon
398         if (beta.getAtomicNumber() != 7 && beta.getAtomicNumber() != 1 && beta != carbonyl) {
399           break;
400         }
401         beta = null;
402       }
403       if (beta == null) {
404         return null;
405       }
406       List<Bond> betabonds = beta.getBonds();
407       Atom gamma;
408       int carboncount = 0;
409       for (Bond bond : betabonds) {
410         gamma = bond.get1_2(beta);
411         if (gamma.getAtomicNumber() == 6) {
412           carboncount++;
413         }
414       }
415       if (resname.equals("1")) {
416         if (carboncount == 2) {
417           resname = "PRO";
418         } else {
419           resname = "VAL";
420         }
421       } else {
422         if (carboncount == 2) {
423           resname = "LEU";
424         } else {
425           resname = "ILE";
426         }
427       }
428     } else if (resname.equals("3")) {
429       Atom c3 = backbone.get(start + 3);
430       int num = countCO(c3);
431       if (num == 2) {
432         resname = "A";
433       } else {
434         resname = "DG";
435       }
436     }
437     Residue residue = null;
438     try {
439       NucleicAcidUtils.NucleicAcid3.valueOf(resname.toUpperCase());
440       residue = new Residue(resname, Residue.ResidueType.NA);
441     } catch (Exception e) {
442       //
443     }
444     if (residue == null) {
445       try {
446         AminoAcidUtils.AminoAcid3.valueOf(resname.toUpperCase());
447         residue = new Residue(resname, Residue.ResidueType.AA);
448       } catch (Exception e) {
449         //
450       }
451     }
452     if (residue == null) {
453       residue = new Residue(resname, Residue.ResidueType.UNK);
454     }
455 
456     // Create the Residue group
457     for (Atom a : atoms) {
458       residue.addMSNode(a);
459       // logger.info(a.toString());
460     }
461     return residue;
462   }
463 
464   /**
465    * Given an array of bonded atoms, this function recursively collects all other connected atoms,
466    * without backtracking over atoms already in the list. Disulfide bonds are not crossed. (the
467    * intent is to search along a peptide backbone) Atoms preloaded into the List provide search
468    * termination.
469    *
470    * @param seed Atom
471    * @param atoms List
472    */
473   private static void collectAtoms(Atom seed, List<Atom> atoms) {
474     collectAtoms(seed, atoms, false);
475   }
476 
477   /**
478    * Given an array of bonded atoms, this function recursively collects all other connected atoms,
479    * without backtracking over atoms already in the list. Disulfide bonds can be crossed. (the
480    * intent is to not cross disulfide bonds in a protein, but cross disulfide bonds within molecules)
481    * Atoms preloaded into the List provide search termination.
482    *
483    * @param seed Atom
484    * @param atoms List
485    * @param searchDisulfide Allows crossing of disulfide bonds (e.g. molecules)
486    */
487   private static void collectAtoms(Atom seed, List<Atom> atoms, boolean searchDisulfide) {
488     if (seed == null) {
489       return;
490     }
491     atoms.add(seed);
492     for (Bond b : seed.getBonds()) {
493       Atom nextAtom = b.get1_2(seed);
494       if (nextAtom.getParent() != null) {
495         continue;
496       }
497       // If true, search across disulfide bonds.
498       if ((searchDisulfide || (nextAtom.getAtomicNumber() != 16 || seed.getAtomicNumber() != 16))
499           && !atoms.contains(nextAtom)) {
500         collectAtoms(nextAtom, atoms, searchDisulfide);
501       }
502     }
503   }
504 
505   /**
506    * countCO
507    *
508    * @param adjacent a {@link ffx.potential.bonded.Atom} object.
509    * @return a int.
510    */
511   private static int countCO(Atom adjacent) {
512     int total = 0;
513     for (Bond b : adjacent.getBonds()) {
514       Atom carbonyl = b.get1_2(adjacent);
515       if (carbonyl.getAtomicNumber() == 6) {
516         for (Bond b2 : carbonyl.getBonds()) {
517           Atom oxygen = b2.get1_2(carbonyl);
518           if (oxygen.getAtomicNumber() == 8) {
519             total++;
520           }
521         }
522       }
523     }
524     return total;
525   }
526 
527   /**
528    * divideBackbone
529    *
530    * @param backbone a {@link java.util.List} object.
531    * @param c a {@link ffx.potential.bonded.Polymer} object.
532    * @return a boolean.
533    */
534   private static boolean divideBackbone(List<Atom> backbone, Polymer c) {
535     int length = backbone.size();
536 
537     // Try to find a Phosphorus or Nitrogen in the backbone
538     int nitrogenCount = 0;
539     int phosphorusCount = 0;
540     for (Atom match : backbone) {
541       int atomicNumber = match.getAtomicNumber();
542       if (atomicNumber == 15) {
543         phosphorusCount++;
544       } else if (atomicNumber == 7) {
545         nitrogenCount++;
546       }
547     }
548 
549     // logger.info(format(" Divide Backbone: %d Nitrogen", nitrogenCount));
550     // logger.info(format(" Divide Backbone: %d Phosphorous", phosphorusCount));
551 
552     PolymerType type;
553     if (phosphorusCount >= nitrogenCount && phosphorusCount > 1) {
554       // logger.info(" Divide Backbone: Nucleic Acid");
555       type = PolymerType.NUCLEICACID;
556     } else if (nitrogenCount > phosphorusCount && nitrogenCount > 2) {
557       // logger.info(" Divide Backbone: Amino Acid");
558       type = PolymerType.AMINOACID;
559     } else {
560       // logger.info(" Divide Backbone: Unknown");
561       return false;
562     }
563     int start = -1;
564     for (int i = 0; i < length; i++) {
565       Residue res = patternMatch(i, backbone, type);
566       if (res != null) {
567         for (Atom a : res.getAtomList()) {
568           a.setParent(null);
569         }
570         if (!(res.getName().equals("Unknown"))) {
571           start = i;
572           // Want 5' to 3'
573           if (type == PolymerType.NUCLEICACID) {
574             Atom carbon5 = backbone.get(start + 1);
575             if (numberOfBondsWith(carbon5, 6) != 1) {
576               start = -1;
577             }
578           }
579           break;
580         }
581       }
582     }
583     if (start == -1) {
584       backbone = reverseAtomList(backbone);
585       for (int i = 0; i < length; i++) {
586         Residue res = patternMatch(i, backbone, type);
587         if (res != null) {
588           for (Atom a : res.getAtomList()) {
589             a.setParent(null);
590           }
591           if (!(res.getName().equals("Unknown"))) {
592             start = i;
593             break;
594           }
595         }
596       }
597     }
598     if (start == -1) {
599       return false;
600     }
601     // Potential Polypeptide
602     if (type == PolymerType.AMINOACID) {
603       Atom nitrogen, alpha, carbonyl = null;
604       Atom nitrogen2, carbonyl2;
605       Residue aa = null;
606       int lastRes = 0;
607       int firstRes = -1;
608       List<Residue> aaArray = new ArrayList<>();
609       while (start < length) {
610         aa = patternMatch(start, backbone, PolymerType.AMINOACID);
611         if (aa != null) {
612           if (firstRes == -1) {
613             firstRes = start;
614             carbonyl = findCarbonyl(backbone.get(start));
615           }
616           aaArray.add(aa);
617           lastRes = start;
618         }
619         start += 3;
620       }
621       // Make sure the first residue is found
622       aa = null;
623       if (carbonyl != null) {
624         alpha = findAlphaCarbon(carbonyl);
625         if (alpha != null) {
626           nitrogen = findBondWith(alpha, 7);
627           if (nitrogen != null) {
628             nitrogen2 = findBondWith(carbonyl, 7);
629             List<Atom> firstAtoms = getAtomListFromPool();
630             firstAtoms.add(nitrogen);
631             firstAtoms.add(alpha);
632             firstAtoms.add(carbonyl);
633             firstAtoms.add(nitrogen2);
634             aa = patternMatch(0, firstAtoms, PolymerType.AMINOACID);
635             addAtomListToPool(firstAtoms);
636             if (aa != null) {
637               addCap(alpha, nitrogen, aa);
638               aaArray.add(0, aa);
639             }
640           }
641         }
642       }
643       // Add the remaining atoms to the end of the Polymer
644       if (aa == null) {
645         nitrogen = backbone.get(firstRes);
646         alpha = backbone.get(firstRes + 1);
647         addCap(alpha, nitrogen, aaArray.get(0));
648       }
649       // Make sure the last residue is found
650       aa = null;
651       carbonyl = findCarbonyl(backbone.get(lastRes + 1));
652       if (carbonyl != null) {
653         nitrogen = findBondWith(carbonyl, 7);
654         if (nitrogen != null) {
655           alpha = findAlphaCarbon(nitrogen);
656           if (alpha != null) {
657             carbonyl2 = findCarbonyl(alpha);
658             if (carbonyl2 != null) {
659               List<Atom> lastAtoms = getAtomListFromPool();
660               lastAtoms.add(carbonyl);
661               lastAtoms.add(nitrogen);
662               lastAtoms.add(alpha);
663               lastAtoms.add(carbonyl2);
664               aa = patternMatch(1, lastAtoms, PolymerType.AMINOACID);
665               addAtomListToPool(lastAtoms);
666               if (aa != null) {
667                 addCap(alpha, carbonyl2, aa);
668                 aaArray.add(aa);
669               }
670             }
671           }
672         }
673       }
674       if (aa == null) {
675         carbonyl = backbone.get(lastRes + 2);
676         alpha = backbone.get(lastRes + 1);
677         addCap(alpha, carbonyl, aaArray.get(aaArray.size() - 1));
678       }
679       int index = 1;
680       for (Residue r : aaArray) {
681         r.setNumber(index++);
682         if(!renameAminoAcidToPDBStandard(r)){
683           return false;
684         }
685         c.addMSNode(r);
686       }
687       // Potential DNA/RNA
688     } else if (type == PolymerType.NUCLEICACID) {
689       Residue base;
690       int lastRes = 0;
691       boolean firstBase = true;
692       Atom phos = null;
693       Atom oxygen1 = null;
694       Atom phosphate1 = null;
695       List<Residue> na = new ArrayList<>();
696       while (start < length) {
697         base = patternMatch(start, backbone, PolymerType.NUCLEICACID);
698         if (base != null) {
699           phos = backbone.get(start - 1);
700           if (phos != null && phos.getAtomicNumber() == 15) {
701             addPhosphate(phos, base);
702           }
703           na.add(base);
704           if (firstBase) {
705             firstBase = false;
706             phosphate1 = backbone.get(start - 1);
707             oxygen1 = backbone.get(start);
708           }
709           lastRes = start;
710         }
711         start += 6;
712       }
713       // Make sure the first base is found
714       Atom o2, o3;
715       Atom c1, c2, c3;
716       if (phosphate1 != null && oxygen1 != null) {
717         o2 = findOtherOxygen(phosphate1, oxygen1);
718         if (o2 != null) {
719           c1 = findBondWith(o2, 6);
720           if (c1 != null) {
721             c2 = findCO(c1);
722             if (c2 != null) {
723               c3 = findC5(c2);
724               if (c3 != null) {
725                 o3 = findBondWith(c3, 8);
726                 if (o3 != null) {
727                   List<Atom> firstAtoms = getAtomListFromPool();
728                   firstAtoms.add(o3);
729                   firstAtoms.add(c3);
730                   firstAtoms.add(c2);
731                   firstAtoms.add(c1);
732                   firstAtoms.add(o2);
733                   firstAtoms.add(phosphate1);
734                   base = patternMatch(0, firstAtoms, type);
735                   if (base != null) {
736                     addCap(c3, o3, base);
737                     na.add(0, base);
738                   }
739                 }
740               }
741             }
742           }
743         }
744       }
745       // Make sure the last base is found
746       oxygen1 = backbone.get(lastRes + 4);
747       phosphate1 = backbone.get(lastRes + 5);
748       if (phosphate1 != null && oxygen1 != null) {
749         o2 = findOtherOxygen(phosphate1, oxygen1);
750         if (o2 != null) {
751           c1 = findBondWith(o2, 6);
752           if (c1 != null) {
753             c2 = findBondWith(c1, 6);
754             if (c2 != null) {
755               c3 = findCCO(c2);
756               if (c3 != null) {
757                 o3 = findBondWith(c3, 8);
758                 if (o3 != null) {
759                   List<Atom> lastAtoms = getAtomListFromPool();
760                   lastAtoms.add(phosphate1);
761                   lastAtoms.add(o2);
762                   lastAtoms.add(c1);
763                   lastAtoms.add(c2);
764                   lastAtoms.add(c3);
765                   lastAtoms.add(o3);
766                   base = patternMatch(1, lastAtoms, type);
767                   if (base != null) {
768                     addPhosphate(phosphate1, base);
769                     addCap(c3, o3, base);
770                     na.add(base);
771                   }
772                 }
773               }
774             }
775           }
776         }
777       }
778       int index = 1;
779       for (Residue r : na) {
780         r.setNumber(index++);
781         renameNucleicAcidToPDBStandard(r);
782         c.addMSNode(r);
783       }
784     } else {
785       return false;
786     }
787     return true;
788   }
789 
790   /**
791    * Returns a carbon that is bonded to the atom a, a carbonyl group, and a nitrogen. O=C-alpha-N
792    *
793    * @param a Atom
794    * @return Atom
795    */
796   private static Atom findAlphaCarbon(Atom a) {
797     for (Bond b : a.getBonds()) {
798       Atom alpha = b.get1_2(a);
799       if (alpha.getAtomicNumber() == 6 && findCO(alpha) != null && formsBondsWith(alpha, 7)) {
800         return alpha;
801       }
802     }
803     return null;
804   }
805 
806   /**
807    * Returns the first atom with the specified atomic number that bonds with atom a, or null
808    * otherwise.
809    *
810    * @param a Atom
811    * @param atomicNumber int
812    * @return Atom
813    */
814   private static Atom findBondWith(Atom a, int atomicNumber) {
815     for (Bond b : a.getBonds()) {
816       Atom other = b.get1_2(a);
817       if (other.getAtomicNumber() == atomicNumber) {
818         return other;
819       }
820     }
821     return null;
822   }
823 
824   /**
825    * Returns a carbon that is bonded to the adjacent atom, which bonds 1 carbon and 1 oxygen.
826    *
827    * @param adjacent Atom
828    * @return Atom
829    */
830   private static Atom findC5(Atom adjacent) {
831     for (Bond b : adjacent.getBonds()) {
832       Atom carbon = b.get1_2(adjacent);
833       if (carbon.getAtomicNumber() == 6
834           && numberOfBondsWith(carbon, 6) == 1
835           && numberOfBondsWith(carbon, 8) == 1) {
836         return carbon;
837       }
838     }
839     return null;
840   }
841 
842   /**
843    * Returns a carbon that is bonded to the adjacent atom, which bonds 2 carbons and 1 oxygen.
844    *
845    * @param adjacent Atom
846    * @return Atom
847    */
848   private static Atom findCCO(Atom adjacent) {
849     for (Bond b : adjacent.getBonds()) {
850       Atom carbon = b.get1_2(adjacent);
851       if (carbon.getAtomicNumber() == 6
852           && numberOfBondsWith(carbon, 6) == 2
853           && numberOfBondsWith(carbon, 8) >= 1) {
854         return carbon;
855       }
856     }
857     return null;
858   }
859 
860   /**
861    * Returns a carbon that is bonded to the adjacent atom, which bonds at least 1 oxygen.
862    *
863    * @param adjacent Atom
864    * @return Atom
865    */
866   private static Atom findCO(Atom adjacent) {
867     for (Bond b : adjacent.getBonds()) {
868       Atom carbon = b.get1_2(adjacent);
869       if (carbon.getAtomicNumber() == 6 && formsBondsWith(carbon, 8)) {
870         return carbon;
871       }
872     }
873     return null;
874   }
875 
876   /**
877    * Returns a carbon that is bonded to the adjacent atom and an oxygen.
878    *
879    * @param adjacent Atom
880    * @return Atom
881    */
882   private static Atom findCarbonyl(Atom adjacent) {
883     for (Bond b : adjacent.getBonds()) {
884       Atom carbonyl = b.get1_2(adjacent);
885       if (carbonyl.getAtomicNumber() == 6) {
886         for (Bond b2 : carbonyl.getBonds()) {
887           Atom oxygen = b2.get1_2(carbonyl);
888           if (oxygen.getAtomicNumber() == 8 && oxygen.getBonds().size() == 1) {
889             return carbonyl;
890           }
891         }
892       }
893     }
894     return null;
895   }
896 
897   /**
898    * Returns an oxygen atom that is bonded to atom p and a carbon, but is not atom o. O-P-X-C where
899    * X is the returned atom. This is useful for traversing a nucleic acid backbone.
900    *
901    * @param p Atom
902    * @param o Atom
903    * @return Atom
904    */
905   private static Atom findOtherOxygen(Atom p, Atom o) {
906     for (Bond b : p.getBonds()) {
907       Atom oxygen = b.get1_2(p);
908       if (oxygen.getAtomicNumber() == 8 && oxygen != o && formsBondsWith(oxygen, 6)) {
909         return oxygen;
910       }
911     }
912     return null;
913   }
914 
915   /**
916    * findPolymer
917    *
918    * @param currentAtom Atom
919    * @param path List
920    * @return List
921    */
922   private static List<Atom> findPolymer(Atom currentAtom, List<Atom> path) {
923 
924     // Atom has no bonds to follow
925     if (currentAtom.getBonds() == null) {
926       path = getAtomListFromPool();
927       path.add(currentAtom);
928       return path;
929     }
930 
931     // End of Recursion condition
932     if (currentAtom.getParent() != null) {
933       return null;
934     }
935 
936     // Only C,N,O,P in a DNA/RNA/protein backbone
937     int anum = currentAtom.getAtomicNumber();
938     if (anum != 6 && anum != 7 && anum != 8 && anum != 15) {
939       return null;
940     }
941 
942     // Allow the search to make it out of side chains, but not enter them.
943     if (path != null && path.size() > 6) {
944 
945       // Oxygen is only in the backbone for Nucleic Acids in a phosphate group
946       if (anum == 8) {
947         if (!formsBondsWith(currentAtom, 15)) {
948           // Found an oxygen not bonded to a phosphate.
949           return null;
950         }
951         // Nitrogen is only in the backbone in peptide bonds or at an N-terminus.
952       } else if (anum == 7) {
953         Atom alphaCarbon = findAlphaCarbon(currentAtom);
954         if (alphaCarbon == null) {
955           return null;
956         }
957         // Avoid more than 3 carbons in a row (phenyl groups, etc.)
958       } else if (anum == 6) {
959         Atom a;
960         int anum2, anum3;
961         int size = path.size();
962         a = path.get(size - 1);
963         anum2 = a.getAtomicNumber();
964         if (anum2 == 6) {
965           a = path.get(size - 2);
966           anum3 = a.getAtomicNumber();
967           if (anum3 == 6) {
968             return null;
969           }
970         }
971       }
972     }
973 
974     Atom previousAtom = null;
975     if (path != null) {
976       previousAtom = path.get(path.size() - 1);
977     }
978 
979     // Atoms with only one bond are at the end of a Polymer
980     List<Bond> bonds = currentAtom.getBonds();
981     if (bonds.size() == 1 && previousAtom != null) {
982       return null;
983     }
984 
985     // Initialization
986     if (path == null) {
987       path = getAtomListFromPool();
988       previousAtom = null;
989       // Or Continuation
990     } else {
991       List<Atom> pathclone = getAtomListFromPool();
992       pathclone.addAll(path);
993       path = pathclone;
994     }
995 
996     // Add the currentAtom to the growing path
997     path.add(currentAtom);
998 
999     // Continue search in each bond direction, but no backtracking over previousAtom
1000     List<Atom> newPolymer, maxPolymer = getAtomListFromPool();
1001     for (Bond b : bonds) {
1002       Atom nextAtom = b.get1_2(currentAtom);
1003 
1004       // Check to avoid returning in the same direction and loops
1005       if (nextAtom != previousAtom && !path.contains(nextAtom)) {
1006         newPolymer = findPolymer(nextAtom, path);
1007         if (newPolymer != null) {
1008           // logger.info(" Next atom: " + nextAtom + " Size: " + newPolymer.size());
1009           // Check to see if the Polymers contain any of the same atoms,
1010           // and if so, use the shorter Polymer (avoids loops)
1011           // if (haveCommonAtom(newPolymer, maxPolymer)) {
1012           //    if (newPolymer.size() < maxPolymer.size()) {
1013           //        addAtomListToPool(maxPolymer);
1014           //        maxPolymer = newPolymer;
1015           //    }
1016           // } else if (newPolymer.size() > maxPolymer.size()) {
1017           if (newPolymer.size() > maxPolymer.size()) {
1018             addAtomListToPool(maxPolymer);
1019             maxPolymer = newPolymer;
1020           }
1021         }
1022       }
1023     }
1024 
1025     // Add the currentAtom to the longest discovered chain and return
1026     maxPolymer.add(0, currentAtom);
1027     return maxPolymer;
1028   }
1029 
1030   /**
1031    * True if Atom a forms a bond with another atom of the specified atomic Number.
1032    *
1033    * @param a Atom
1034    * @param atomicNumber int
1035    * @return boolean
1036    */
1037   private static boolean formsBondsWith(Atom a, int atomicNumber) {
1038     for (Bond b : a.getBonds()) {
1039       Atom other = b.get1_2(a);
1040       if (other.getAtomicNumber() == atomicNumber) {
1041         return true;
1042       }
1043     }
1044     return false;
1045   }
1046 
1047   /**
1048    * getAtomListFromPool
1049    *
1050    * @return a {@link java.util.List} object.
1051    */
1052   private static List<Atom> getAtomListFromPool() {
1053     if (atomListPool.isEmpty()) {
1054       return new ArrayList<>();
1055     }
1056     return atomListPool.remove(0);
1057   }
1058 
1059   /**
1060    * Convert possibly duplicate chainID into a unique segID.
1061    *
1062    * @param c chain ID just read.
1063    * @return a unique segID.
1064    */
1065   private static String getSegID(Character c, List<String> segIDs) {
1066     if (c == null || c.equals(' ')) {
1067       c = 'A';
1068     }
1069 
1070     // Loop through existing segIDs to find the first one that is unused.
1071     int m = 0;
1072     for (String segID : segIDs) {
1073       if (segID.endsWith(c.toString())) {
1074         m++;
1075       }
1076     }
1077 
1078     // If the count is greater than 0, then append it.
1079     String newSegID;
1080     if (m == 0) {
1081       newSegID = c.toString();
1082     } else {
1083       newSegID = c.toString() + m;
1084     }
1085 
1086     segIDs.add(newSegID);
1087     return newSegID;
1088   }
1089 
1090   /**
1091    * Returns true if the lists contain any atom in common.
1092    *
1093    * @param list1 List
1094    * @param list2 List
1095    * @return boolean
1096    */
1097   private static boolean haveCommonAtom(List<Atom> list1, List<Atom> list2) {
1098     if (list1 == null || list2 == null) {
1099       return false;
1100     }
1101     for (Atom a : list1) {
1102       if (list2.contains(a)) {
1103         return true;
1104       }
1105     }
1106     return false;
1107   }
1108 
1109   /**
1110    * Returns true if Atom a is water oxygen.
1111    *
1112    * @param a Atom
1113    * @return boolean
1114    */
1115   static boolean isWaterOxygen(Atom a) {
1116     if (a.getAtomicNumber() != 8) {
1117       return false;
1118     }
1119     for (Bond b : a.getBonds()) {
1120       Atom h = b.get1_2(a);
1121       if (h.getAtomicNumber() != 1) {
1122         return false;
1123       }
1124     }
1125     return true;
1126   }
1127 
1128   /**
1129    * This function returns the number of times atom "a" is bonded to an atom of the specified atomic
1130    * number.
1131    *
1132    * @param a Atom
1133    * @param atomicNumber int
1134    * @return int
1135    */
1136   private static int numberOfBondsWith(Atom a, int atomicNumber) {
1137     int total = 0;
1138     for (Bond b : a.getBonds()) {
1139       Atom other = b.get1_2(a);
1140       if (other.getAtomicNumber() == atomicNumber) {
1141         total++;
1142       }
1143     }
1144     return total;
1145   }
1146 
1147   /**
1148    * Check to see if a portion of the backbone matches that of a biological polymer, and if so
1149    * determine the respective residue
1150    *
1151    * @param start First atom.
1152    * @param backbone List of backbone atoms.
1153    * @param type Type of residue.
1154    * @return The residue.
1155    */
1156   private static Residue patternMatch(int start, List<Atom> backbone, PolymerType type) {
1157     int[] pattern;
1158 
1159     // Initialization
1160     if (type == PolymerType.AMINOACID) {
1161       pattern = AminoAcidUtils.AAPATTERN;
1162       if (backbone.size() < start + pattern.length) {
1163         return null;
1164       }
1165       // Check for correct Carbonyl placement
1166       Atom a = backbone.get(start + 1);
1167       if (formsBondsWith(a, 8)) {
1168         return null;
1169       }
1170       a = backbone.get(start + 2);
1171       if (!formsBondsWith(a, 8)) {
1172         return null;
1173       }
1174     } else if (type == PolymerType.NUCLEICACID) {
1175       pattern = NucleicAcidUtils.NAPATTERN;
1176       if (backbone.size() < start + pattern.length) {
1177         return null;
1178       }
1179     } else {
1180       return null;
1181     }
1182 
1183     int length = pattern.length;
1184     List<Atom> atoms = getAtomListFromPool();
1185     List<Atom> sidePolymer = getAtomListFromPool();
1186     for (int i = 0; i < length; i++) {
1187       Atom a = backbone.get(start + i);
1188       // Add backbone atoms to terminate sidePolymer search
1189       sidePolymer.add(a);
1190       if (a.getAtomicNumber() != pattern[i]) {
1191         return null;
1192       }
1193     }
1194     // Collect all the atoms in the Residue
1195     // Add the atom before and after the backbone pattern to
1196     // terminate the search, then remove them
1197     if (start > 0) {
1198       atoms.add(backbone.get(start - 1));
1199     }
1200     if (start + length < backbone.size()) {
1201       atoms.add(backbone.get(start + length));
1202     }
1203     collectAtoms(backbone.get(start), atoms);
1204     if (start > 0) {
1205       atoms.remove(0);
1206     }
1207     if (start + length < backbone.size()) {
1208       atoms.remove(0);
1209       // Collect Just Side chain atoms, then remove backbone termination
1210     }
1211 
1212     if (type == PolymerType.AMINOACID) {
1213       collectAtoms(sidePolymer.get(1), sidePolymer);
1214     } else {
1215       Atom seed = null;
1216       for (Atom a : atoms) {
1217         if (a.getAtomicNumber() == 7) {
1218           seed = a;
1219           break;
1220         }
1221       }
1222       if (seed != null && seed.getAtomicNumber() == 7) {
1223         sidePolymer.add(seed);
1224         collectAtoms(seed, sidePolymer);
1225       } else {
1226         return null;
1227       }
1228     }
1229 
1230     sidePolymer.subList(0, length + 1).clear();
1231 
1232     return assignResidue(backbone, start, atoms, sidePolymer);
1233   }
1234 
1235   /**
1236    * Returns an List with reversed ordering.
1237    *
1238    * @param atomList List
1239    * @return List
1240    */
1241   private static List<Atom> reverseAtomList(List<Atom> atomList) {
1242     List<Atom> reversedList = getAtomListFromPool();
1243     for (Atom a : atomList) {
1244       reversedList.add(0, a);
1245     }
1246     return reversedList;
1247   }
1248 
1249   /** An enumeration of recognized file types. */
1250   public enum FileType {
1251     XYZ,
1252     XPH,
1253     INT,
1254     ARC,
1255     PDB,
1256     CIF,
1257     ANY,
1258     SIM,
1259     UNK
1260   }
1261 
1262   /** An enumeration of recognized organic polymers. */
1263   public enum PolymerType {
1264     AMINOACID,
1265     NUCLEICACID,
1266     UNKNOWN
1267   }
1268 }