View Javadoc
1   // ******************************************************************************
2   //
3   // Title:       Force Field X.
4   // Description: Force Field X - Software for Molecular Biophysics.
5   // Copyright:   Copyright (c) Michael J. Schnieders 2001-2024.
6   //
7   // This file is part of Force Field X.
8   //
9   // Force Field X is free software; you can redistribute it and/or modify it
10  // under the terms of the GNU General Public License version 3 as published by
11  // the Free Software Foundation.
12  //
13  // Force Field X is distributed in the hope that it will be useful, but WITHOUT
14  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16  // details.
17  //
18  // You should have received a copy of the GNU General Public License along with
19  // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple
20  // Place, Suite 330, Boston, MA 02111-1307 USA
21  //
22  // Linking this library statically or dynamically with other modules is making a
23  // combined work based on this library. Thus, the terms and conditions of the
24  // GNU General Public License cover the whole combination.
25  //
26  // As a special exception, the copyright holders of this library give you
27  // permission to link this library with independent modules to produce an
28  // executable, regardless of the license terms of these independent modules, and
29  // to copy and distribute the resulting executable under terms of your choice,
30  // provided that you also meet, for each linked independent module, the terms
31  // and conditions of the license of that module. An independent module is a
32  // module which is not derived from or based on this library. If you modify this
33  // library, you may extend this exception to your version of the library, but
34  // you are not obligated to do so. If you do not wish to do so, delete this
35  // exception statement from your version.
36  //
37  // ******************************************************************************
38  package ffx.potential.parsers;
39  
40  import static ffx.potential.bonded.BondedUtils.numberAtoms;
41  import static ffx.potential.bonded.NamingUtils.renameAtomsToPDBStandard;
42  import static ffx.potential.bonded.PolymerUtils.assignAtomTypes;
43  import static ffx.potential.bonded.PolymerUtils.buildDisulfideBonds;
44  import static ffx.potential.bonded.PolymerUtils.buildMissingResidues;
45  import static ffx.potential.bonded.PolymerUtils.locateDisulfideBonds;
46  import static ffx.potential.parsers.PDBFilter.PDBFileStandard.VERSION3_2;
47  import static ffx.potential.parsers.PDBFilter.PDBFileStandard.VERSION3_3;
48  import static ffx.utilities.StringUtils.padLeft;
49  import static java.lang.Double.parseDouble;
50  import static java.lang.Integer.parseInt;
51  import static java.lang.String.format;
52  import static org.apache.commons.lang3.StringUtils.repeat;
53  import static org.apache.commons.math3.util.FastMath.min;
54  
55  import ffx.crystal.Crystal;
56  import ffx.crystal.SpaceGroup;
57  import ffx.crystal.SpaceGroupDefinitions;
58  import ffx.crystal.SpaceGroupInfo;
59  import ffx.crystal.SymOp;
60  import ffx.potential.MolecularAssembly;
61  import ffx.potential.Utilities.FileType;
62  import ffx.potential.bonded.AminoAcidUtils;
63  import ffx.potential.bonded.AminoAcidUtils.AminoAcid3;
64  import ffx.potential.bonded.Atom;
65  import ffx.potential.bonded.Bond;
66  import ffx.potential.bonded.MSNode;
67  import ffx.potential.bonded.Molecule;
68  import ffx.potential.bonded.Polymer;
69  import ffx.potential.bonded.Residue;
70  import ffx.potential.parameters.ForceField;
71  import ffx.utilities.Hybrid36;
72  import ffx.utilities.StringUtils;
73  import java.io.BufferedReader;
74  import java.io.BufferedWriter;
75  import java.io.File;
76  import java.io.FileNotFoundException;
77  import java.io.FileReader;
78  import java.io.FileWriter;
79  import java.io.IOException;
80  import java.util.ArrayList;
81  import java.util.Collections;
82  import java.util.HashMap;
83  import java.util.List;
84  import java.util.Map;
85  import java.util.Objects;
86  import java.util.OptionalDouble;
87  import java.util.Set;
88  import java.util.logging.Level;
89  import java.util.logging.Logger;
90  import java.util.regex.Matcher;
91  import java.util.regex.Pattern;
92  import java.util.stream.Collectors;
93  import org.apache.commons.configuration2.CompositeConfiguration;
94  
95  /**
96   * The PDBFilter class parses data from a Protein DataBank (*.PDB) file. The following records are
97   * recognized: ANISOU, ATOM, CONECT, CRYST1, END, HELIX, HETATM, LINK, SHEET, SSBOND, REMARK. The
98   * rest are currently ignored.
99   *
100  * @author Michael J. Schnieders
101  * @see <a href="http://www.wwpdb.org/documentation/format32/v3.2.html">PDB format 3.2</a>
102  * @since 1.0
103  */
104 public final class PDBFilter extends SystemFilter {
105 
106   private static final Logger logger = Logger.getLogger(PDBFilter.class.getName());
107   private static final Set<String> backboneNames;
108   private static final Set<String> constantPhBackboneNames;
109 
110   static {
111     String[] names = {"C", "CA", "N", "O", "OXT", "OT2"};
112     backboneNames = Set.of(names);
113 
114     String[] constantPhNames = {"C", "CA", "N", "O", "OXT", "OT2", "H", "HA", "H1", "H2", "H3"};
115     constantPhBackboneNames = Set.of(constantPhNames);
116   }
117 
118   /** Map of SEQRES entries. */
119   private final Map<Character, String[]> seqRes = new HashMap<>();
120   /** Map of DBREF entries. */
121   private final Map<Character, int[]> dbRef = new HashMap<>();
122   /** List of altLoc characters seen in the PDB file. */
123   private final List<Character> altLocs = new ArrayList<>();
124   /**
125    * List of segIDs defined for the PDB file.
126    *
127    * <p>The expectation is for chain naming from A-Z, then from 0-9. For large systems, chain names
128    * are sometimes reused due to limitations in the PDB format.
129    *
130    * <p>However, we define segIDs to always be unique. For the first A-Z,0-9 series chainID ==
131    * segID. Then, for second A-Z,0-9 series, the segID = 1A-1Z,10-19, and for the third series segID
132    * = 2A-2Z,20-29, and so on.
133    */
134   private final List<String> segIDs = new ArrayList<>();
135 
136   private final Map<Character, List<String>> segidMap = new HashMap<>();
137   /** Maps a chain to the number of insertion codes encountered in that chain. */
138   private final Map<Character, Integer> insertionCodeCount = new HashMap<>();
139   /**
140    * Maps chainIDResNumInsCode to renumbered chainIDResNum. For example, residue 52A in chain C might
141    * be renumbered to residue 53, and mapped as "C52A" to "C53".
142    */
143   private final Map<String, String> pdbToNewResMap = new HashMap<>();
144   /** List of modified residues * */
145   private final Map<String, String> modRes = new HashMap<>();
146   /** Keep track of ATOM record serial numbers to match them with ANISOU records. */
147   private final HashMap<Integer, Atom> atoms = new HashMap<>();
148 
149   private final Map<MolecularAssembly, BufferedReader> readers = new HashMap<>();
150   /** The current altLoc - i.e., the one we are defining a chemical system for. */
151   private Character currentAltLoc = 'A';
152   /** Character for the current chain ID. */
153   private Character currentChainID = null;
154   /** String for the current SegID. */
155   private String currentSegID = null;
156   /** Flag to indicate a mutation is requested. */
157   private boolean mutate = false;
158   private List<Mutation> mutations = null;
159   private List<Integer> resNumberList = null;
160   /** Flag to indicate if missing fields should be printed (i.e. missing B-factors). */
161   private boolean printMissingFields = true;
162   /** Number of symmetry operators when expanding to a P1 unit cell (-1 saves as current spacegroup). */
163   private int nSymOp = -1;
164   /**
165    * The serial field continues from the previous asymmetric unit when expanding to P1. This is not
166    * used when saving as the current spacegroup.
167    */
168   private int serialP1 = 0;
169   /** Assume current standard. */
170   private PDBFileStandard fileStandard = VERSION3_3;
171   /** If false, skip logging "Saving file". */
172   private boolean logWrites = true;
173   /** Keep track of the current MODEL in the file. */
174   private int modelsRead = 1;
175   /** Tracks output MODEL numbers. Unused if below zero. */
176   private int modelsWritten = -1;
177   /** Replicates vector dimensions if saving as expanded. */
178   private int[] lmn = new int[]{1,1,1};
179   /** Replicates vector along a-axis. */
180   private int l = 0;
181   /** Replicates vector along b-axis. */
182   private int m = 0;
183   /** Replicates vector along c-axis. */
184   private int n = 0;
185   private String versionFileName;
186 
187   private final File readFile;
188   private List<String> remarkLines = Collections.emptyList();
189   private double lastReadLambda = Double.NaN;
190 
191   /**
192    * If true, read in titratable residues in their fully protonated form.
193    */
194   private boolean constantPH = false;
195   /**
196    * If true, read in titratable residues in their protonated form.
197    */
198   private boolean rotamerTitration = false;
199   /**
200    * List of residue to rename for constantPH simulations.
201    */
202   private static final HashMap<AminoAcid3, AminoAcid3> constantPHResidueMap = new HashMap<>();
203 
204   static {
205     // Lysine
206     constantPHResidueMap.put(AminoAcidUtils.AminoAcid3.LYD, AminoAcidUtils.AminoAcid3.LYS);
207     // Cysteine
208     constantPHResidueMap.put(AminoAcidUtils.AminoAcid3.CYD, AminoAcidUtils.AminoAcid3.CYS);
209     // Histidine
210     constantPHResidueMap.put(AminoAcidUtils.AminoAcid3.HID, AminoAcidUtils.AminoAcid3.HIS);
211     constantPHResidueMap.put(AminoAcidUtils.AminoAcid3.HIE, AminoAcidUtils.AminoAcid3.HIS);
212     // Aspartate
213     constantPHResidueMap.put(AminoAcidUtils.AminoAcid3.ASP, AminoAcidUtils.AminoAcid3.ASD);
214     constantPHResidueMap.put(AminoAcidUtils.AminoAcid3.ASH, AminoAcidUtils.AminoAcid3.ASD);
215     // Glutamate
216     constantPHResidueMap.put(AminoAcidUtils.AminoAcid3.GLU, AminoAcidUtils.AminoAcid3.GLD);
217     constantPHResidueMap.put(AminoAcidUtils.AminoAcid3.GLH, AminoAcidUtils.AminoAcid3.GLD);
218   }
219 
220   /**
221    * List of residue to rename for rotamer titration simulations.
222    */
223   private static final HashMap<AminoAcid3, AminoAcid3> rotamerResidueMap = new HashMap<>();
224 
225   static {
226     // Lysine
227     rotamerResidueMap.put(AminoAcidUtils.AminoAcid3.LYD, AminoAcidUtils.AminoAcid3.LYS);
228     // Histidine
229     rotamerResidueMap.put(AminoAcidUtils.AminoAcid3.HID, AminoAcidUtils.AminoAcid3.HIS);
230     rotamerResidueMap.put(AminoAcidUtils.AminoAcid3.HIE, AminoAcidUtils.AminoAcid3.HIS);
231     // Aspartate
232     rotamerResidueMap.put(AminoAcidUtils.AminoAcid3.ASP, AminoAcidUtils.AminoAcid3.ASH);
233     // Glutamate
234     rotamerResidueMap.put(AminoAcidUtils.AminoAcid3.GLU, AminoAcidUtils.AminoAcid3.GLH);
235     //Cysteine
236     rotamerResidueMap.put(AminoAcidUtils.AminoAcid3.CYD, AminoAcidUtils.AminoAcid3.CYS);
237   }
238 
239   /**
240    * Constructor for PDBFilter.
241    *
242    * @param files a {@link java.util.List} object.
243    * @param molecularAssembly a {@link ffx.potential.MolecularAssembly} object.
244    * @param forceField a {@link ffx.potential.parameters.ForceField} object.
245    * @param properties a {@link org.apache.commons.configuration2.CompositeConfiguration}
246    *     object.
247    */
248   public PDBFilter(List<File> files, MolecularAssembly molecularAssembly, ForceField forceField,
249       CompositeConfiguration properties) {
250     super(files, molecularAssembly, forceField, properties);
251     bondList = new ArrayList<>();
252     this.fileType = FileType.PDB;
253     readFile = files.get(0);
254   }
255 
256   /**
257    * Parse the PDB File from a URL.
258    *
259    * @param file a {@link java.io.File} object.
260    * @param molecularAssembly a {@link ffx.potential.MolecularAssembly} object.
261    * @param forceField a {@link ffx.potential.parameters.ForceField} object.
262    * @param properties a {@link org.apache.commons.configuration2.CompositeConfiguration}
263    *     object.
264    */
265   public PDBFilter(File file, MolecularAssembly molecularAssembly, ForceField forceField,
266       CompositeConfiguration properties) {
267     super(file, molecularAssembly, forceField, properties);
268     bondList = new ArrayList<>();
269     this.fileType = FileType.PDB;
270     readFile = file;
271   }
272 
273   /**
274    * Parse the PDB File from a URL.
275    *
276    * @param file a {@link java.io.File} object.
277    * @param molecularAssemblies a {@link java.util.List} object.
278    * @param forceField a {@link ffx.potential.parameters.ForceField} object.
279    * @param properties a {@link org.apache.commons.configuration2.CompositeConfiguration}
280    *     object.
281    */
282   public PDBFilter(File file, List<MolecularAssembly> molecularAssemblies, ForceField forceField,
283       CompositeConfiguration properties) {
284     super(file, molecularAssemblies, forceField, properties);
285     bondList = new ArrayList<>();
286     this.fileType = FileType.PDB;
287     readFile = file;
288   }
289 
290   /**
291    * Constructor for PDBFilter with residue numbers.
292    *
293    * @param file a {@link java.util.List} object.
294    * @param molecularAssembly a {@link ffx.potential.MolecularAssembly} object.
295    * @param forceField a {@link ffx.potential.parameters.ForceField} object.
296    * @param properties a {@link org.apache.commons.configuration2.CompositeConfiguration}
297    *     object.
298    * @param resNumberList a List of integer residue numbers for constant pH rotamer
299    *     optimization.
300    */
301   public PDBFilter(File file, MolecularAssembly molecularAssembly, ForceField forceField,
302       CompositeConfiguration properties, List<Integer> resNumberList) {
303     super(file, molecularAssembly, forceField, properties);
304     bondList = new ArrayList<>();
305     this.fileType = FileType.PDB;
306     this.readFile = file;
307     this.resNumberList = resNumberList;
308     //this.chainList = chainList;
309   }
310 
311 
312   /**
313    * Simple method useful for converting files to PDB format.
314    *
315    * @param atom a {@link ffx.potential.bonded.Atom} object.
316    * @return Returns a PDB ATOM or HETATM record for the passed Atom.
317    */
318   public static String toPDBAtomLine(Atom atom) {
319     StringBuilder sb;
320     if (atom.isHetero()) {
321       sb = new StringBuilder("HETATM");
322     } else {
323       sb = new StringBuilder("ATOM  ");
324     }
325     sb.append(repeat(" ", 74));
326 
327     String name = atom.getName();
328     if (name.length() > 4) {
329       name = name.substring(0, 4);
330     } else if (name.length() == 1) {
331       name = name + "  ";
332     } else if (name.length() == 2) {
333       name = name + " ";
334     }
335     int serial = atom.getXyzIndex();
336     sb.replace(6, 16, format("%5s " + padLeft(name.toUpperCase(), 4), Hybrid36.encode(5, serial)));
337 
338     Character altLoc = atom.getAltLoc();
339     if (altLoc != null) {
340       sb.setCharAt(16, altLoc);
341     } else {
342       char blankChar = ' ';
343       sb.setCharAt(16, blankChar);
344     }
345 
346     String resName = atom.getResidueName();
347     sb.replace(17, 20, padLeft(resName.toUpperCase(), 3));
348 
349     char chain = atom.getChainID();
350     sb.setCharAt(21, chain);
351 
352     int resID = atom.getResidueNumber();
353     sb.replace(22, 26, format("%4s", Hybrid36.encode(4, resID)));
354 
355     double[] xyz = atom.getXYZ(null);
356     StringBuilder decimals = new StringBuilder();
357     for (int i = 0; i < 3; i++) {
358       try {
359         decimals.append(StringUtils.fwFpDec(xyz[i], 8, 3));
360       } catch (IllegalArgumentException ex) {
361         String newValue = StringUtils.fwFpTrunc(xyz[i], 8, 3);
362         logger.info(
363             format(" XYZ coordinate %8.3f for atom %s overflowed PDB format and is truncated to %s.",
364                 xyz[i], atom, newValue));
365         decimals.append(newValue);
366       }
367     }
368     try {
369       decimals.append(StringUtils.fwFpDec(atom.getOccupancy(), 6, 2));
370     } catch (IllegalArgumentException ex) {
371       logger.severe(
372           format(" Occupancy %6.2f for atom %s must be between 0 and 1.", atom.getOccupancy(),
373               atom));
374     }
375     try {
376       decimals.append(StringUtils.fwFpDec(atom.getTempFactor(), 6, 2));
377     } catch (IllegalArgumentException ex) {
378       String newValue = StringUtils.fwFpTrunc(atom.getTempFactor(), 6, 2);
379       logger.info(
380           format(" B-factor %6.2f for atom %s overflowed the PDB format and is truncated to %s.",
381               atom.getTempFactor(), atom, newValue));
382       decimals.append(newValue);
383     }
384 
385     sb.replace(30, 66, decimals.toString());
386     sb.replace(78, 80, format("%2d", 0));
387     sb.append("\n");
388     return sb.toString();
389   }
390 
391   public void setConstantPH(boolean constantPH) {
392     this.constantPH = constantPH;
393   }
394 
395   public void setRotamerTitration(boolean rotamerTitration) {
396     this.rotamerTitration = rotamerTitration;
397   }
398 
399   /** clearSegIDs */
400   public void clearSegIDs() {
401     segIDs.clear();
402   }
403 
404   /** {@inheritDoc} */
405   @Override
406   public void closeReader() {
407     for (MolecularAssembly system : systems) {
408       BufferedReader br = readers.get(system);
409       if (br != null) {
410         try {
411           br.close();
412         } catch (IOException ex) {
413           logger.warning(format(" Exception in closing system %s: %s", system.toString(), ex));
414         }
415       }
416     }
417   }
418 
419   @Override
420   public int countNumModels() {
421     Set<File> files = systems.stream().map(MolecularAssembly::getFile).map(File::toString).distinct()
422         .map(File::new).collect(Collectors.toSet());
423 
424     // Dangers of parallelism are minimized by: unique files/filenames, read-only access.
425     return files.parallelStream().mapToInt((File fi) -> {
426       int nModelsLocal = 0;
427       try (BufferedReader br = new BufferedReader(new FileReader(fi))) {
428         String line = br.readLine();
429         while (line != null) {
430           if (line.startsWith("MODEL")) {
431             ++nModelsLocal;
432           }
433           line = br.readLine();
434         }
435         nModelsLocal = Math.max(1, nModelsLocal);
436       } catch (IOException ex) {
437         logger.info(format(" Exception in parsing file %s: %s", fi, ex));
438       }
439       return nModelsLocal;
440     }).sum();
441   }
442 
443   /**
444    * Get the list of alternate locations encountered.
445    *
446    * @return the alternate location list.
447    */
448   public List<Character> getAltLocs() {
449     return altLocs;
450   }
451 
452   /** {@inheritDoc} */
453   @Override
454   public OptionalDouble getLastReadLambda() {
455     return Double.isNaN(lastReadLambda) ? OptionalDouble.empty() : OptionalDouble.of(lastReadLambda);
456   }
457 
458   /**
459    * Returns all the remark lines found by the last readFile call.
460    *
461    * @return Remark lines from the last readFile call.
462    */
463   @Override
464   public String[] getRemarkLines() {
465     int nRemarks = remarkLines.size();
466     return remarkLines.toArray(new String[nRemarks]);
467   }
468 
469   @Override
470   public int getSnapshot() {
471     return modelsRead;
472   }
473 
474   /**
475    * Mutate a residue at the PDB file is being parsed.
476    *
477    * @param chainID the Chain ID of the residue to mutate.
478    * @param resID the Residue ID of the residue to mutate.
479    * @param name the 3-letter code of the amino acid to mutate to.
480    */
481   public void mutate(Character chainID, int resID, String name) {
482     logger.info(format(" Mutating chain %c residue %d to %s.", chainID, resID, name));
483     mutate = true;
484     if (mutations == null) {
485       mutations = new ArrayList<>();
486     }
487     mutations.add(new Mutation(resID, chainID, name));
488   }
489 
490   /**
491    * mutate.
492    *
493    * @param mutations a {@link java.util.List} object.
494    */
495   public void mutate(List<Mutation> mutations) {
496     if (this.mutations == null) {
497       this.mutations = new ArrayList<>();
498     }
499     this.mutations.addAll(mutations);
500   }
501 
502   /** Parse the PDB File */
503   @Override
504   public boolean readFile() {
505     remarkLines = new ArrayList<>();
506     // First atom is #1, to match xyz file format
507     int xyzIndex = 1;
508     setFileRead(false);
509     systems.add(activeMolecularAssembly);
510 
511     List<String> conects = new ArrayList<>();
512     List<String> links = new ArrayList<>();
513     List<String> ssbonds = new ArrayList<>();
514     List<String> structs = new ArrayList<>();
515     try {
516       for (File file : files) {
517         currentFile = file;
518         if (mutate) {
519           List<Character> chainIDs = new ArrayList<>();
520           try (BufferedReader br = new BufferedReader(new FileReader(file))) {
521             String line = br.readLine();
522             while (line != null) {
523               // Replace all tabs w/ 4x spaces
524               line = line.replaceAll("\t", "    ");
525               String identity = line;
526               if (line.length() > 6) {
527                 identity = line.substring(0, 6);
528               }
529               identity = identity.trim().toUpperCase();
530               Record record;
531               try {
532                 record = Record.valueOf(identity);
533               } catch (Exception e) {
534                 // Continue until the record is recognized.
535                 line = br.readLine();
536                 continue;
537               }
538               switch (record) {
539                 case ANISOU, HETATM, ATOM -> {
540                   char c22 = line.charAt(21);
541                   boolean idFound = false;
542                   for (Character chainID : chainIDs) {
543                     if (c22 == chainID) {
544                       idFound = true;
545                       break;
546                     }
547                   }
548                   if (!idFound) {
549                     chainIDs.add(c22);
550                   }
551                 }
552               }
553               line = br.readLine();
554             }
555             for (Mutation mtn : mutations) {
556               if (!chainIDs.contains(mtn.chainChar)) {
557                 if (chainIDs.size() == 1) {
558                   logger.warning(
559                       format(" Chain ID %c for mutation not found: only one chain %c found.",
560                           mtn.chainChar, chainIDs.get(0)));
561                 } else {
562                   logger.warning(
563                       format(" Chain ID %c for mutation not found: mutation will not proceed.",
564                           mtn.chainChar));
565                 }
566               }
567             }
568           } catch (IOException ioException) {
569             logger.fine(format(" Exception %s in parsing file to find chain IDs", ioException));
570           }
571         }
572 
573         // Check that the current file exists and that we can read it.
574         if (currentFile == null || !currentFile.exists() || !currentFile.canRead()) {
575           return false;
576         }
577 
578         // Open the current file for parsing.
579         try (BufferedReader br = new BufferedReader(new FileReader(currentFile))) {
580           // Echo the alternate location being parsed.
581           if (currentAltLoc == 'A') {
582             logger.info(format(" Reading %s", currentFile.getName()));
583           } else {
584             logger.info(format(" Reading %s alternate location %s", currentFile.getName(), currentAltLoc));
585 
586           }
587           activeMolecularAssembly.setAlternateLocation(currentAltLoc);
588 
589           // Reset the current chain and segID.
590           currentChainID = null;
591           currentSegID = null;
592           boolean containsInsCode = false;
593 
594           // Read the first line of the file.
595           String line = br.readLine();
596 
597           // Parse until END or ENDMDL is found, or to the end of the file.
598           while (line != null) {
599             // Replace all tabs w/ 4x spaces
600             line = line.replaceAll("\t", "    ");
601             String identity = line;
602             if (line.length() > 6) {
603               identity = line.substring(0, 6);
604             }
605             identity = identity.trim().toUpperCase();
606             Record record;
607             try {
608               record = Record.valueOf(identity);
609             } catch (Exception e) {
610               // Continue until the record is recognized.
611               line = br.readLine();
612               continue;
613             }
614 
615             // Switch on the known record.
616             switch (record) {
617               case ENDMDL:
618               case END:
619                 // Setting "line" to null will exit the loop.
620                 line = null;
621                 continue;
622               case DBREF:
623 // =============================================================================
624 //  1 -  6       Record name   "DBREF "
625 //  8 - 11       IDcode        idCode             ID code of this entry.
626 // 13            Character     chainID            Chain identifier.
627 // 15 - 18       Integer       seqBegin           Initial sequence number of the
628 //                                                PDB sequence segment.
629 // 19            AChar         insertBegin        Initial insertion code of the
630 //                                                PDB sequence segment.
631 // 21 - 24       Integer       seqEnd             Ending sequence number of the
632 //                                                PDB sequence segment.
633 // 25            AChar         insertEnd          Ending insertion code of the
634 //                                                PDB sequence segment.
635 // 27 - 32       LString       database           Sequence database name.
636 // 34 - 41       LString       dbAccession        Sequence database accession code.
637 // 43 - 54       LString       dbIdCode           Sequence  database identification code.
638 // 56 - 60       Integer       dbseqBegin         Initial sequence number of the
639 //                                                database seqment.
640 // 61            AChar         idbnsBeg           Insertion code of initial residue of the
641 //                                                segment, if PDB is the reference.
642 // 63 - 67       Integer       dbseqEnd           Ending sequence number of the
643 //                                                database segment.
644 // 68            AChar         dbinsEnd           Insertion code of the ending residue of
645 //                                                the segment, if PDB is the reference.
646 // =============================================================================
647                 Character chainID = line.substring(12, 13).toUpperCase().charAt(0);
648                 int seqBegin = parseInt(line.substring(14, 18).trim());
649                 int seqEnd = parseInt(line.substring(20, 24).trim());
650                 int[] seqRange = dbRef.computeIfAbsent(chainID, k -> new int[2]);
651                 seqRange[0] = seqBegin;
652                 seqRange[1] = seqEnd;
653                 break;
654               case SEQRES:
655 // =============================================================================
656 //  1 -  6        Record name    "SEQRES"
657 //  8 - 10        Integer        serNum       Serial number of the SEQRES record for the
658 //                                            current  chain. Starts at 1 and increments
659 //                                            by one  each line. Reset to 1 for each chain.
660 // 12             Character      chainID      Chain identifier. This may be any single
661 //                                            legal  character, including a blank which is
662 //                                            is used if there is only one chain.
663 // 14 - 17        Integer        numRes       Number of residues in the chain.
664 //                                            This  value is repeated on every record.
665 // 20 - 22        Residue name   resName      Residue name.
666 // 24 - 26        Residue name   resName      Residue name.
667 // 28 - 30        Residue name   resName      Residue name.
668 // 32 - 34        Residue name   resName      Residue name.
669 // 36 - 38        Residue name   resName      Residue name.
670 // 40 - 42        Residue name   resName      Residue name.
671 // 44 - 46        Residue name   resName      Residue name.
672 // 48 - 50        Residue name   resName      Residue name.
673 // 52 - 54        Residue name   resName      Residue name.
674 // 56 - 58        Residue name   resName      Residue name.
675 // 60 - 62        Residue name   resName      Residue name.
676 // 64 - 66        Residue name   resName      Residue name.
677 // 68 - 70        Residue name   resName      Residue name.
678 // =============================================================================
679                 activeMolecularAssembly.addHeaderLine(line);
680                 chainID = line.substring(11, 12).toUpperCase().charAt(0);
681                 int serNum = parseInt(line.substring(7, 10).trim());
682                 String[] chain = seqRes.get(chainID);
683                 int numRes = parseInt(line.substring(13, 17).trim());
684                 if (chain == null) {
685                   chain = new String[numRes];
686                   seqRes.put(chainID, chain);
687                 }
688                 int resID = (serNum - 1) * 13;
689                 int end = line.length();
690                 for (int start = 19; start + 3 <= end; start += 4) {
691                   String res = line.substring(start, start + 3).trim();
692                   if (res.isEmpty()) {
693                     break;
694                   }
695                   chain[resID++] = res;
696                 }
697                 break;
698               case MODRES:
699                 String modResName = line.substring(12, 15).trim();
700                 String stdName = line.substring(24, 27).trim();
701                 modRes.put(modResName.toUpperCase(), stdName.toUpperCase());
702                 activeMolecularAssembly.addHeaderLine(line);
703 // =============================================================================
704 //  1 -  6        Record name     "MODRES"
705 //  8 - 11        IDcode          idCode         ID code of this entry.
706 // 13 - 15        Residue name    resName        Residue name used in this entry.
707 // 17             Character       chainID        Chain identifier.
708 // 19 - 22        Integer         seqNum         Sequence number.
709 // 23             AChar           iCode          Insertion code.
710 // 25 - 27        Residue name    stdRes         Standard residue name.
711 // 30 - 70        String          comment        Description of the residue modification.
712 // =============================================================================
713                 break;
714               case ANISOU:
715 // =============================================================================
716 //  1 - 6        Record name   "ANISOU"
717 //  7 - 11       Integer       serial         Atom serial number.
718 // 13 - 16       Atom          name           Atom name.
719 // 17            Character     altLoc         Alternate location indicator
720 // 18 - 20       Residue name  resName        Residue name.
721 // 22            Character     chainID        Chain identifier.
722 // 23 - 26       Integer       resSeq         Residue sequence number.
723 // 27            AChar         iCode          Insertion code.
724 // 29 - 35       Integer       u[0][0]        U(1,1)
725 // 36 - 42       Integer       u[1][1]        U(2,2)
726 // 43 - 49       Integer       u[2][2]        U(3,3)
727 // 50 - 56       Integer       u[0][1]        U(1,2)
728 // 57 - 63       Integer       u[0][2]        U(1,3)
729 // 64 - 70       Integer       u[1][2]        U(2,3)
730 // 77 - 78       LString(2)    element        Element symbol, right-justified.
731 // 79 - 80       LString(2)    charge         Charge on the atom.
732 // =============================================================================
733                 boolean deleteAnisou = properties.getBoolean("delete-anisou", false);
734                 double resetBfactors = properties.getDouble("reset-bfactors", -1.0);
735                 if (deleteAnisou || resetBfactors >= 0.0) {
736                   break;
737                 }
738                 Integer serial = Hybrid36.decode(5, line.substring(6, 11));
739                 Character altLoc = line.substring(16, 17).toUpperCase().charAt(0);
740                 if (!altLocs.contains(altLoc)) {
741                   altLocs.add(altLoc);
742                 }
743                 if (!altLoc.equals(' ') && !altLoc.equals('A') && !altLoc.equals(currentAltLoc)) {
744                   break;
745                 }
746                 double[] adp = new double[6];
747                 adp[0] = parseInt(line.substring(28, 35).trim()) * 1.0e-4;
748                 adp[1] = parseInt(line.substring(35, 42).trim()) * 1.0e-4;
749                 adp[2] = parseInt(line.substring(42, 49).trim()) * 1.0e-4;
750                 adp[3] = parseInt(line.substring(49, 56).trim()) * 1.0e-4;
751                 adp[4] = parseInt(line.substring(56, 63).trim()) * 1.0e-4;
752                 adp[5] = parseInt(line.substring(63, 70).trim()) * 1.0e-4;
753                 if (atoms.containsKey(serial)) {
754                   Atom a = atoms.get(serial);
755                   a.setAltLoc(altLoc);
756                   a.setAnisou(adp);
757                 } else {
758                   logger.info(
759                       format(" No ATOM record for ANISOU serial number %d has been found.", serial));
760                   logger.info(format(" This ANISOU record will be ignored:\n %s", line));
761                 }
762                 break;
763               case ATOM:
764 // =============================================================================
765 //  1 -  6        Record name   "ATOM  "
766 //  7 - 11        Integer       serial       Atom serial number.
767 // 13 - 16        Atom          name         Atom name.
768 // 17             Character     altLoc       Alternate location indicator.
769 // 18 - 20        Residue name  resName      Residue name.
770 // 22             Character     chainID      Chain identifier.
771 // 23 - 26        Integer       resSeq       Residue sequence number.
772 // 27             AChar         iCode        Code for insertion of residues.
773 // 31 - 38        Real(8.3)     x            Orthogonal coordinates for X in Angstroms.
774 // 39 - 46        Real(8.3)     y            Orthogonal coordinates for Y in Angstroms.
775 // 47 - 54        Real(8.3)     z            Orthogonal coordinates for Z in Angstroms.
776 // 55 - 60        Real(6.2)     occupancy    Occupancy.
777 // 61 - 66        Real(6.2)     tempFactor   Temperature factor.
778 // 77 - 78        LString(2)    element      Element symbol, right-justified.
779 // 79 - 80        LString(2)    charge       Charge  on the atom.
780 // =============================================================================
781                 String name;
782                 String resName;
783                 String segID;
784                 int resSeq;
785                 boolean printAtom;
786                 double[] d;
787                 double occupancy;
788                 double tempFactor;
789                 Atom newAtom;
790                 Atom returnedAtom;
791                 // If it's a misnamed water, it will fall through to HETATM.
792                 if (!line.substring(17, 20).trim().equals("HOH")) {
793                   serial = Hybrid36.decode(5, line.substring(6, 11));
794                   name = line.substring(12, 16).trim();
795                   if (name.toUpperCase().contains("1H") || name.toUpperCase().contains("2H")
796                       || name.toUpperCase().contains("3H")) {
797                     // VERSION3_2 is presently just a placeholder for "anything non-standard".
798                     fileStandard = VERSION3_2;
799                   }
800                   altLoc = line.substring(16, 17).toUpperCase().charAt(0);
801                   if (!altLocs.contains(altLoc)) {
802                     altLocs.add(altLoc);
803                   }
804                   if (!altLoc.equals(' ') && !altLoc.equals(currentAltLoc)) {
805                     break;
806                   }
807                   // if (!altLoc.equals(' ') && !altLoc.equals('A') && !altLoc.equals(currentAltLoc)) {
808                   //  break;
809                   // }
810                   resName = line.substring(17, 20).trim();
811                   chainID = line.substring(21, 22).charAt(0);
812                   segID = getSegID(chainID);
813                   resSeq = Hybrid36.decode(4, line.substring(22, 26));
814 
815                   char insertionCode = line.charAt(26);
816                   if (insertionCode != ' ' && !containsInsCode) {
817                     containsInsCode = true;
818                     logger.warning(
819                         " FFX support for files with " + "insertion codes is experimental. "
820                             + "Residues will be renumbered to " + "eliminate insertion codes (52A "
821                             + "becomes 53, 53 becomes 54, etc)");
822                   }
823 
824                   int offset = insertionCodeCount.getOrDefault(chainID, 0);
825                   String pdbResNum = format("%c%d%c", chainID, resSeq, insertionCode);
826                   if (!pdbToNewResMap.containsKey(pdbResNum)) {
827                     if (insertionCode != ' ') {
828                       ++offset;
829                       insertionCodeCount.put(chainID, offset);
830                     }
831                     resSeq += offset;
832                     if (offset != 0) {
833                       logger.info(
834                           format(" Chain %c " + "residue %s-%s renumbered to %c %s-%d", chainID,
835                               pdbResNum.substring(1).trim(), resName, chainID, resName, resSeq));
836                     }
837                     String newNum = format("%c%d", chainID, resSeq);
838                     pdbToNewResMap.put(pdbResNum, newNum);
839                   } else {
840                     resSeq += offset;
841                   }
842 
843                   printAtom = false;
844                   if (mutate) {
845                     boolean doBreak = false;
846                     for (Mutation mtn : mutations) {
847                       if (chainID == mtn.chainChar && resSeq == mtn.resID) {
848                         String atomName = name.toUpperCase();
849                         if (backboneNames.contains(atomName)) {
850                           printAtom = true;
851                           resName = mtn.resName;
852                         } else {
853                           logger.info(
854                               format(" Deleting atom %s of %s %d", atomName, resName, resSeq));
855                           doBreak = true;
856                           break;
857                         }
858                       }
859                     }
860                     if (doBreak) {
861                       break;
862                     }
863                   }
864 
865                   if (constantPH) {
866                     AminoAcid3 aa3 = AminoAcidUtils.getAminoAcid(resName.toUpperCase());
867                     if (constantPHResidueMap.containsKey(aa3)) {
868                       String atomName = name.toUpperCase();
869                       AminoAcid3 aa3PH = constantPHResidueMap.get(aa3);
870                       resName = aa3PH.name();
871                       if (constantPhBackboneNames.contains(atomName)) {
872                         logger.info(format(" %s-%d %s", resName, resSeq, atomName));
873                       } else if (!atomName.startsWith("H")) {
874                         logger.info(format(" %s-%d %s", resName, resSeq, atomName));
875                       } else {
876                         logger.info(format(" %s-%d %s skipped", resName, resSeq, atomName));
877                         break;
878                       }
879                     }
880                   } else if (rotamerTitration) {
881                     AminoAcid3 aa3 = AminoAcidUtils.getAminoAcid(resName.toUpperCase());
882                     if (rotamerResidueMap.containsKey(aa3) && resNumberList.contains(resSeq)) {
883                       AminoAcid3 aa3rotamer = rotamerResidueMap.get(aa3);
884                       resName = aa3rotamer.name();
885                     }
886                   }
887                   d = new double[3];
888                   d[0] = parseDouble(line.substring(30, 38).trim());
889                   d[1] = parseDouble(line.substring(38, 46).trim());
890                   d[2] = parseDouble(line.substring(46, 54).trim());
891                   occupancy = 1.0;
892                   tempFactor = 1.0;
893                   try {
894                     occupancy = parseDouble(line.substring(54, 60).trim());
895                     tempFactor = parseDouble(line.substring(60, 66).trim());
896                   } catch (NumberFormatException | StringIndexOutOfBoundsException e) {
897                     // Use default values.
898                     if (printMissingFields) {
899                       logger.info(" Missing occupancy and b-factors set to 1.0.");
900                       printMissingFields = false;
901                     } else if (logger.isLoggable(Level.FINE)) {
902                       logger.fine(" Missing occupancy and b-factors set to 1.0.");
903                     }
904                   }
905 
906                   double bfactor = properties.getDouble("reset-bfactors", -1.0);
907                   if (bfactor >= 0.0) {
908                     tempFactor = bfactor;
909                   }
910 
911                   newAtom = new Atom(0, name, altLoc, d, resName, resSeq, chainID, occupancy, tempFactor, segID);
912 
913                   // Check if this is a modified residue.
914                   if (modRes.containsKey(resName.toUpperCase())) {
915                     newAtom.setModRes(true);
916                   }
917                   returnedAtom = (Atom) activeMolecularAssembly.addMSNode(newAtom);
918                   if (returnedAtom != newAtom) {
919                     // A previously added atom has been retained.
920                     atoms.put(serial, returnedAtom);
921                     if (logger.isLoggable(Level.FINE)) {
922                       logger.fine(returnedAtom + " has been retained over\n" + newAtom);
923                     }
924                   } else {
925                     // The new atom has been added.
926                     atoms.put(serial, newAtom);
927                     // Check if the newAtom took the xyzIndex of a previous alternate conformer.
928                     if (newAtom.getIndex() == 0) {
929                       newAtom.setXyzIndex(xyzIndex++);
930                     }
931                     if (printAtom) {
932                       logger.info(newAtom.toString());
933                     }
934                   }
935                   break;
936                 }
937               case HETATM:
938 // =============================================================================
939 //  1 - 6        Record name    "HETATM"
940 //  7 - 11       Integer        serial        Atom serial number.
941 // 13 - 16       Atom           name          Atom name.
942 // 17            Character      altLoc        Alternate location indicator.
943 // 18 - 20       Residue name   resName       Residue name.
944 // 22            Character      chainID       Chain identifier.
945 // 23 - 26       Integer        resSeq        Residue sequence number.
946 // 27            AChar          iCode         Code for insertion of residues.
947 // 31 - 38       Real(8.3)      x             Orthogonal coordinates for X.
948 // 39 - 46       Real(8.3)      y             Orthogonal coordinates for Y.
949 // 47 - 54       Real(8.3)      z             Orthogonal coordinates for Z.
950 // 55 - 60       Real(6.2)      occupancy     Occupancy.
951 // 61 - 66       Real(6.2)      tempFactor    Temperature factor.
952 // 77 - 78       LString(2)     element       Element symbol; right-justified.
953 // 79 - 80       LString(2)     charge        Charge on the atom.
954 // =============================================================================
955                 serial = Hybrid36.decode(5, line.substring(6, 11));
956                 name = line.substring(12, 16).trim();
957                 altLoc = line.substring(16, 17).toUpperCase().charAt(0);
958                 if (!altLocs.contains(altLoc)) {
959                   altLocs.add(altLoc);
960                 }
961                 if (!altLoc.equals(' ') && !altLoc.equals(currentAltLoc)) {
962                   break;
963                 }
964                 // if (!altLoc.equals(' ') && !altLoc.equals('A') && !altLoc.equals(currentAltLoc)) {
965                 //  break;
966                 //}
967                 resName = line.substring(17, 20).trim();
968                 chainID = line.substring(21, 22).charAt(0);
969                 segID = getSegID(chainID);
970                 resSeq = Hybrid36.decode(4, line.substring(22, 26));
971 
972                 char insertionCode = line.charAt(26);
973                 if (insertionCode != ' ' && !containsInsCode) {
974                   containsInsCode = true;
975                   logger.warning(" FFX support for files with " + "insertion codes is experimental. "
976                       + "Residues will be renumbered to " + "eliminate insertion codes (52A "
977                       + "becomes 53, 53 becomes 54, etc)");
978                 }
979 
980                 int offset = insertionCodeCount.getOrDefault(chainID, 0);
981                 String pdbResNum = format("%c%d%c", chainID, resSeq, insertionCode);
982                 if (!pdbToNewResMap.containsKey(pdbResNum)) {
983                   if (insertionCode != ' ') {
984                     ++offset;
985                     insertionCodeCount.put(chainID, offset);
986                   }
987                   resSeq += offset;
988                   if (offset != 0) {
989                     logger.info(
990                         format(" Chain %c " + "molecule %s-%s renumbered to %c %s-%d", chainID,
991                             pdbResNum.substring(1).trim(), resName, chainID, resName, resSeq));
992                   }
993                   String newNum = format("%c%d", chainID, resSeq);
994                   pdbToNewResMap.put(pdbResNum, newNum);
995                 } else {
996                   resSeq += offset;
997                 }
998 
999                 d = new double[3];
1000                 d[0] = parseDouble(line.substring(30, 38).trim());
1001                 d[1] = parseDouble(line.substring(38, 46).trim());
1002                 d[2] = parseDouble(line.substring(46, 54).trim());
1003                 occupancy = 1.0;
1004                 tempFactor = 1.0;
1005                 try {
1006                   occupancy = parseDouble(line.substring(54, 60).trim());
1007                   tempFactor = parseDouble(line.substring(60, 66).trim());
1008                 } catch (NumberFormatException | StringIndexOutOfBoundsException e) {
1009                   // Use default values.
1010                   if (printMissingFields) {
1011                     logger.info(" Missing occupancy and b-factors set to 1.0.");
1012                     printMissingFields = false;
1013                   } else if (logger.isLoggable(Level.FINE)) {
1014                     logger.fine(" Missing occupancy and b-factors set to 1.0.");
1015                   }
1016                 }
1017 
1018                 double bfactor = properties.getDouble("reset-bfactors", -1.0);
1019                 if (bfactor >= 0.0) {
1020                   tempFactor = bfactor;
1021                 }
1022 
1023                 newAtom = new Atom(0, name, altLoc, d, resName, resSeq, chainID, occupancy, tempFactor, segID);
1024                 newAtom.setHetero(true);
1025                 // Check if this is a modified residue.
1026                 if (modRes.containsKey(resName.toUpperCase())) {
1027                   newAtom.setModRes(true);
1028                 }
1029                 returnedAtom = (Atom) activeMolecularAssembly.addMSNode(newAtom);
1030                 if (returnedAtom != newAtom) {
1031                   // A previously added atom has been retained.
1032                   atoms.put(serial, returnedAtom);
1033                   if (logger.isLoggable(Level.FINE)) {
1034                     logger.fine(returnedAtom + " has been retained over\n" + newAtom);
1035                   }
1036                 } else {
1037                   // The new atom has been added.
1038                   atoms.put(serial, newAtom);
1039                   newAtom.setXyzIndex(xyzIndex++);
1040                 }
1041                 break;
1042               case CRYST1:
1043 // =============================================================================
1044 // The CRYST1 record presents the unit cell parameters, space group, and Z
1045 // value. If the structure was not determined by crystallographic means, CRYST1
1046 // simply provides the unitary values, with an appropriate REMARK.
1047 //
1048 //  7 - 15       Real(9.3)     a              a (Angstroms).
1049 // 16 - 24       Real(9.3)     b              b (Angstroms).
1050 // 25 - 33       Real(9.3)     c              c (Angstroms).
1051 // 34 - 40       Real(7.2)     alpha          alpha (degrees).
1052 // 41 - 47       Real(7.2)     beta           beta (degrees).
1053 // 48 - 54       Real(7.2)     gamma          gamma (degrees).
1054 // 56 - 66       LString       sGroup         Space  group.
1055 // 67 - 70       Integer       z              Z value.
1056 // =============================================================================
1057                 if (line.length() < 55) {
1058                   logger.severe(" CRYST1 record is improperly formatted.");
1059                 }
1060                 double aaxis = parseDouble(line.substring(6, 15).trim());
1061                 double baxis = parseDouble(line.substring(15, 24).trim());
1062                 double caxis = parseDouble(line.substring(24, 33).trim());
1063                 double alpha = parseDouble(line.substring(33, 40).trim());
1064                 double beta = parseDouble(line.substring(40, 47).trim());
1065                 double gamma = parseDouble(line.substring(47, 54).trim());
1066                 int limit = min(line.length(), 66);
1067                 String sg = line.substring(55, limit).trim();
1068                 properties.addProperty("a-axis", aaxis);
1069                 properties.addProperty("b-axis", baxis);
1070                 properties.addProperty("c-axis", caxis);
1071                 properties.addProperty("alpha", alpha);
1072                 properties.addProperty("beta", beta);
1073                 properties.addProperty("gamma", gamma);
1074                 properties.addProperty("spacegroup", SpaceGroupInfo.pdb2ShortName(sg));
1075                 break;
1076               case CONECT:
1077 // =============================================================================
1078 //  7 - 11        Integer        serial       Atom  serial number
1079 // 12 - 16        Integer        serial       Serial number of bonded atom
1080 // 17 - 21        Integer        serial       Serial number of bonded atom
1081 // 22 - 26        Integer        serial       Serial number of bonded atom
1082 // 27 - 31        Integer        serial       Serial number of bonded atom
1083 //
1084 // CONECT records involving atoms for which the coordinates are not present
1085 // in the entry (e.g., symmetry-generated) are not given.
1086 // CONECT records involving atoms for which the coordinates are missing due
1087 // to disorder, are also not provided.
1088 // =============================================================================
1089                 conects.add(line);
1090                 break;
1091               case LINK:
1092 // =============================================================================
1093 // The LINK records specify connectivity between residues that is not implied by
1094 // the primary structure. Connectivity is expressed in terms of the atom names.
1095 // They also include the distance associated with each linkage following the
1096 // symmetry operations at the end of each record.
1097 // 13 - 16         Atom           name1           Atom name.
1098 // 17              Character      altLoc1         Alternate location indicator.
1099 // 18 - 20         Residue name   resName1        Residue  name.
1100 // 22              Character      chainID1        Chain identifier.
1101 // 23 - 26         Integer        resSeq1         Residue sequence number.
1102 // 27              AChar          iCode1          Insertion code.
1103 // 43 - 46         Atom           name2           Atom name.
1104 // 47              Character      altLoc2         Alternate location indicator.
1105 // 48 - 50         Residue name   resName2        Residue name.
1106 // 52              Character      chainID2        Chain identifier.
1107 // 53 - 56         Integer        resSeq2         Residue sequence number.
1108 // 57              AChar          iCode2          Insertion code.
1109 // 60 - 65         SymOP          sym1            Symmetry operator atom 1.
1110 // 67 - 72         SymOP          sym2            Symmetry operator atom 2.
1111 // 74 – 78         Real(5.2)      Length          Link distance
1112 // =============================================================================
1113                 char a1 = line.charAt(16);
1114                 char a2 = line.charAt(46);
1115                 if (a1 != a2) {
1116                   // logger.info(format(" Ignoring LINK record as alternate locations do not match\n
1117                   // %s.", line));
1118                   break;
1119                 }
1120                 if (currentAltLoc == 'A') {
1121                   if ((a1 == ' ' || a1 == 'A') && (a2 == ' ' || a2 == 'A')) {
1122                     links.add(line);
1123                   }
1124                 } else if (a1 == currentAltLoc && a2 == currentAltLoc) {
1125                   links.add(line);
1126                 }
1127                 break;
1128               case SSBOND:
1129 // =============================================================================
1130 // The SSBOND record identifies each disulfide bond in protein and polypeptide
1131 // structures by identifying the two residues involved in the bond.
1132 // The disulfide bond distance is included after the symmetry operations at
1133 // the end of the SSBOND record.
1134 //
1135 //  8 - 10        Integer         serNum       Serial number.
1136 // 12 - 14        LString(3)      "CYS"        Residue name.
1137 // 16             Character       chainID1     Chain identifier.
1138 // 18 - 21        Integer         seqNum1      Residue sequence number.
1139 // 22             AChar           icode1       Insertion code.
1140 // 26 - 28        LString(3)      "CYS"        Residue name.
1141 // 30             Character       chainID2     Chain identifier.
1142 // 32 - 35        Integer         seqNum2      Residue sequence number.
1143 // 36             AChar           icode2       Insertion code.
1144 // 60 - 65        SymOP           sym1         Symmetry oper for 1st resid
1145 // 67 - 72        SymOP           sym2         Symmetry oper for 2nd resid
1146 // 74 – 78        Real(5.2)      Length        Disulfide bond distance
1147 //
1148 // If SG of cysteine is disordered then there are possible alternate linkages.
1149 // wwPDB practice is to put together all possible SSBOND records. This is
1150 // problematic because the alternate location identifier is not specified in
1151 // the SSBOND record.
1152 //
1153 // Notes:
1154 // SSBOND records may be invalid if chain IDs are reused.
1155 // SSBOND records are applied by FFX to all conformers.
1156 // =============================================================================
1157                 ssbonds.add(line);
1158                 break;
1159               case HELIX:
1160 // =============================================================================
1161 // HELIX records are used to identify the position of helices in the molecule.
1162 // Helices are named, numbered, and classified by type. The residues where the
1163 // helix begins and ends are noted, as well as the total length.
1164 //
1165 //  8 - 10        Integer        serNum        Serial number of the helix. This starts
1166 //                                             at 1  and increases incrementally.
1167 // 12 - 14        LString(3)     helixID       Helix  identifier. In addition to a serial
1168 //                                             number, each helix is given an
1169 //                                             alphanumeric character helix identifier.
1170 // 16 - 18        Residue name   initResName   Name of the initial residue.
1171 // 20             Character      initChainID   Chain identifier for the chain containing
1172 //                                             this  helix.
1173 // 22 - 25        Integer        initSeqNum    Sequence number of the initial residue.
1174 // 26             AChar          initICode     Insertion code of the initial residue.
1175 // 28 - 30        Residue  name  endResName    Name of the terminal residue of the helix.
1176 // 32             Character      endChainID    Chain identifier for the chain containing
1177 //                                             this  helix.
1178 // 34 - 37        Integer        endSeqNum     Sequence number of the terminal residue.
1179 // 38             AChar          endICode      Insertion code of the terminal residue.
1180 // 39 - 40        Integer        helixClass    Helix class (see below).
1181 // 41 - 70        String         comment       Comment about this helix.
1182 // 72 - 76        Integer        length        Length of this helix.
1183 //
1184 //                                      CLASS NUMBER
1185 // TYPE OF  HELIX                     (COLUMNS 39 - 40)
1186 // --------------------------------------------------------------
1187 // Right-handed alpha (default)                1
1188 // Right-handed omega                          2
1189 // Right-handed pi                             3
1190 // Right-handed gamma                          4
1191 // Right-handed 3 - 10                         5
1192 // Left-handed alpha                           6
1193 // Left-handed omega                           7
1194 // Left-handed gamma                           8
1195 // 2 - 7 ribbon/helix                          9
1196 // Polyproline                                10
1197 // =============================================================================
1198               case SHEET:
1199 // =============================================================================
1200 // SHEET records are used to identify the position of sheets in the molecule.
1201 // Sheets are both named and numbered. The residues where the sheet begins and
1202 // ends are noted.
1203 //
1204 //  8 - 10        Integer       strand         Strand  number which starts at 1 for each
1205 //                                             strand within a sheet and increases by one.
1206 // 12 - 14        LString(3)    sheetID        Sheet  identifier.
1207 // 15 - 16        Integer       numStrands     Number  of strands in sheet.
1208 // 18 - 20        Residue name  initResName    Residue  name of initial residue.
1209 // 22             Character     initChainID    Chain identifier of initial residue in strand.
1210 // 23 - 26        Integer       initSeqNum     Sequence number of initial residue in strand.
1211 // 27             AChar         initICode      Insertion code of initial residue in  strand.
1212 // 29 - 31        Residue name  endResName     Residue name of terminal residue.
1213 // 33             Character     endChainID     Chain identifier of terminal residue.
1214 // 34 - 37        Integer       endSeqNum      Sequence number of terminal residue.
1215 // 38             AChar         endICode       Insertion code of terminal residue.
1216 // 39 - 40        Integer       sense          Sense of strand with respect to previous
1217 //                                             strand in the sheet. 0 if first strand,
1218 //                                             1 if  parallel,and -1 if anti-parallel.
1219 // 42 - 45        Atom          curAtom        Registration.  Atom name in current strand.
1220 // 46 - 48        Residue name  curResName     Registration.  Residue name in current strand
1221 // 50             Character     curChainId     Registration. Chain identifier in current strand.
1222 // 51 - 54        Integer       curResSeq      Registration.  Residue sequence number
1223 //                                             in current strand.
1224 // 55             AChar         curICode       Registration. Insertion code in current strand.
1225 // 57 - 60        Atom          prevAtom       Registration.  Atom name in previous strand.
1226 // 61 - 63        Residue name  prevResName    Registration.  Residue name in previous strand.
1227 // 65             Character     prevChainId    Registration.  Chain identifier in previous strand.
1228 // 66 - 69        Integer       prevResSeq     Registration. Residue sequence number
1229 //                                             in previous strand.
1230 // 70             AChar         prevICode      Registration.  Insertion code in
1231 //                                             previous strand.
1232 // =============================================================================
1233                 structs.add(line);
1234                 break;
1235               case MODEL: // Currently, no handling in initial read.
1236                 break;
1237               case MTRIX1:
1238 // ================================================================================
1239 // MTRIXn (n = 1, 2, or 3) records present transformations expressing
1240 // non-crystallographic symmetry.
1241 // MTRIXn will appear only when such transformations are required to generate an
1242 // entire asymmetric unit,
1243 // such as a large viral structure.
1244 //
1245 //  8 - 10        Integer       serial         Serial number.
1246 // 11 - 20        Real(10.6)    m[n][1]        Mn1
1247 // 21 - 30        Real(10.6)    m[n][2]        Mn2
1248 // 31 - 40        Real(10.6)    m[n][3]        Mn3
1249 // 21 - 30        Real(10.6)    v[n]           Vn
1250 // 60             Integer       iGiven         1 if coordinates for the representations which are
1251 //                                              approximately related by the transformations of the
1252 //                                              molecule are contained in the entry. Otherwise, blank.
1253 // =================================================================================
1254                 StringBuilder MTRX1 = new StringBuilder(line.substring(11, 55));
1255                 properties.addProperty("MTRIX1", MTRX1);
1256                 break;
1257               case MTRIX2:
1258                 StringBuilder MTRX2 = new StringBuilder(line.substring(11, 55));
1259                 properties.addProperty("MTRIX2", MTRX2);
1260                 break;
1261               case MTRIX3:
1262                 StringBuilder MTRX3 = new StringBuilder(line.substring(11, 55));
1263                 properties.addProperty("MTRIX3", MTRX3);
1264                 break;
1265               case REMARK:
1266                 remarkLines.add(line.trim());
1267                 if (line.contains("Lambda:")) {
1268                   Matcher m = lambdaPattern.matcher(line);
1269                   if (m.find()) {
1270                     lastReadLambda = Double.parseDouble(m.group(1));
1271                   }
1272                 }
1273 // =================================================================================
1274 // REMARK 350: presents all transformations, both crystallographic and non-crystallographic,
1275 // needed to generate the biomolecule. These transformations operate on the coordinates in the
1276 // entry. Both author and computational descriptions of assemblies are provided, if applicable.
1277 // For strict ncs case where more than one assembly presents in asymmetric unit, only one
1278 // chain with unit matrix will reported in REMARK 350, the other chain will be generated
1279 // by rotation and translation.
1280 //
1281 // 20 - 23        Integer       serial         Serial number.
1282 // 24 - 33        Real(10.6)    m[n][1]        Mn1
1283 // 34 - 43        Real(10.6)    m[n][2]        Mn2
1284 // 44 - 53        Real(10.6)    m[n][3]        Mn3
1285 // 59 - 68        Real(10.6)    v[n]           Vn
1286 // =================================================================================
1287                 if (line.length() >= 68) {
1288                   String remarkType = line.substring(7, 10).trim();
1289                   if (remarkType.matches("\\d+") && parseInt(remarkType) == 350 && line.substring(13,
1290                       18).equalsIgnoreCase("BIOMT")) {
1291                     properties.addProperty("BIOMTn", new StringBuilder(line.substring(24, 68)));
1292                   }
1293                 }
1294                 break;
1295               default:
1296                 break;
1297             }
1298             line = br.readLine();
1299           }
1300 
1301         } catch (FileNotFoundException fileNotFoundException) {
1302           logger.log(Level.SEVERE, " PDB file not found", fileNotFoundException);
1303         }
1304       }
1305       xyzIndex--;
1306       setFileRead(true);
1307     } catch (IOException e) {
1308       logger.exiting(PDBFilter.class.getName(), "readFile", e);
1309       return false;
1310     }
1311 
1312     // Locate disulfide bonds; bond parameters are assigned below.
1313     List<Bond> ssBondList = locateDisulfideBonds(ssbonds, activeMolecularAssembly, pdbToNewResMap);
1314 
1315     // Record the number of atoms read in from the PDB file before applying
1316     // algorithms that may build new atoms.
1317     int pdbAtoms = activeMolecularAssembly.getAtomArray().length;
1318 
1319     // Build missing backbone atoms in loops.
1320     buildMissingResidues(xyzIndex, activeMolecularAssembly, seqRes, dbRef);
1321 
1322     // Assign atom types. Missing side-chains atoms and missing hydrogen will be built in.
1323     bondList = assignAtomTypes(activeMolecularAssembly, fileStandard);
1324 
1325     // Assign disulfide bonds parameters and log their creation.
1326     buildDisulfideBonds(ssBondList, activeMolecularAssembly, bondList);
1327 
1328     // Finally, re-number the atoms if missing atoms were created.
1329     int currentN = activeMolecularAssembly.getAtomArray().length;
1330     if (pdbAtoms != currentN) {
1331       logger.info(format(" Renumbering PDB file due to built atoms (%d vs %d)", currentN, pdbAtoms));
1332       numberAtoms(activeMolecularAssembly);
1333     }
1334     return true;
1335   }
1336 
1337   /** {@inheritDoc} */
1338   @Override
1339   public boolean readNext() {
1340     return readNext(false);
1341   }
1342 
1343   /** {@inheritDoc} */
1344   @Override
1345   public boolean readNext(boolean resetPosition) {
1346     return readNext(resetPosition, false);
1347   }
1348 
1349   /** {@inheritDoc} */
1350   @Override
1351   public boolean readNext(boolean resetPosition, boolean print) {
1352     return readNext(resetPosition, print, true);
1353   }
1354 
1355   /** {@inheritDoc} */
1356   @Override
1357   public boolean readNext(boolean resetPosition, boolean print, boolean parse) {
1358     modelsRead = resetPosition ? 1 : modelsRead + 1;
1359     if (!parse) {
1360       if (print) {
1361         logger.info(format(" Skipped Model %d.", modelsRead));
1362       }
1363       return true;
1364     }
1365     remarkLines = new ArrayList<>(remarkLines.size());
1366     // ^ is beginning of line, \\s+ means "one or more whitespace", (\\d+) means match and capture
1367     // one or more digits.
1368     Pattern modelPatt = Pattern.compile("^MODEL\\s+(\\d+)");
1369     boolean eof = true;
1370     for (MolecularAssembly system : systems) {
1371       try {
1372         BufferedReader currentReader;
1373         if (readers.containsKey(system)) {
1374           currentReader = readers.get(system);
1375           try {
1376             if (!currentReader.ready()) {
1377               currentReader = new BufferedReader(new FileReader(readFile));
1378               // Mark the start of the file.
1379               currentReader.mark(0);
1380               readers.remove(system);
1381               readers.put(system, currentReader);
1382             } else if (resetPosition) {
1383               // If the BufferedReader has been opened, and reset is requested, reset the position.
1384               currentReader.reset();
1385             }
1386           } catch (Exception exception) {
1387             // If all structures in the PDB file have been read, the currentReader may have closed.
1388             // The try block will catch this case and reset to the beginning of the file.
1389             currentReader = new BufferedReader(new FileReader(readFile));
1390             // Mark the start of the file.
1391             currentReader.mark(0);
1392             readers.remove(system);
1393             readers.put(system, currentReader);
1394           }
1395         } else {
1396           currentReader = new BufferedReader(new FileReader(readFile));
1397           // Mark the start of the file.
1398           currentReader.mark(0);
1399           readers.put(system, currentReader);
1400         }
1401 
1402         // Skip to appropriate model.
1403         String line = currentReader.readLine();
1404         while (line != null) {
1405           line = line.trim();
1406           Matcher m = modelPatt.matcher(line);
1407           if (m.find()) {
1408             int modelNum = parseInt(m.group(1));
1409             if (modelNum == modelsRead) {
1410               if (print) {
1411                 logger.log(Level.INFO, format(" Reading model %d for %s", modelNum, currentFile));
1412               }
1413               eof = false;
1414               break;
1415             }
1416           }
1417           line = currentReader.readLine();
1418         }
1419         if (eof) {
1420           if (logger.isLoggable(Level.FINEST)) {
1421             logger.log(Level.FINEST, format("\n End of file reached for %s", readFile));
1422           }
1423           currentReader.close();
1424           return false;
1425         }
1426 
1427         // Begin parsing the model.
1428         boolean modelDone = false;
1429         line = currentReader.readLine();
1430         while (line != null) {
1431           line = line.trim();
1432           String recID = line.substring(0, Math.min(6, line.length())).trim();
1433           try {
1434             Record record = Record.valueOf(recID);
1435             boolean hetatm = true;
1436             switch (record) {
1437               // =============================================================================
1438               //
1439               //  7 - 11        Integer       serial       Atom serial number.
1440               // 13 - 16        Atom          name         Atom name.
1441               // 17             Character     altLoc       Alternate location indicator.
1442               // 18 - 20        Residue name  resName      Residue name.
1443               // 22             Character     chainID      Chain identifier.
1444               // 23 - 26        Integer       resSeq       Residue sequence number.
1445               // 27             AChar         iCode        Code for insertion of residues.
1446               // 31 - 38        Real(8.3)     x            Orthogonal coordinates for X in
1447               // Angstroms.
1448               // 39 - 46        Real(8.3)     y            Orthogonal coordinates for Y in
1449               // Angstroms.
1450               // 47 - 54        Real(8.3)     z            Orthogonal coordinates for Z in
1451               // Angstroms.
1452               // 55 - 60        Real(6.2)     occupancy    Occupancy.
1453               // 61 - 66        Real(6.2)     tempFactor   Temperature factor.
1454               // 77 - 78        LString(2)    element      Element symbol, right-justified.
1455               // 79 - 80        LString(2)    charge       Charge  on the atom.
1456               // =============================================================================
1457               //         1         2         3         4         5         6         7
1458               // 123456789012345678901234567890123456789012345678901234567890123456789012345678
1459               // ATOM      1  N   ILE A  16      60.614  71.140 -10.592  1.00  7.38           N
1460               // ATOM      2  CA  ILE A  16      60.793  72.149  -9.511  1.00  6.91           C
1461               case ATOM:
1462                 hetatm = false;
1463               case HETATM:
1464                 String name = line.substring(12, 16).trim();
1465                 if (name.toUpperCase().contains("1H") || name.toUpperCase().contains("2H")
1466                     || name.toUpperCase().contains("3H")) {
1467                   // VERSION3_2 is presently just a placeholder for "anything non-standard".
1468                   fileStandard = VERSION3_2;
1469                 }
1470                 Character altLoc = line.substring(16, 17).toUpperCase().charAt(0);
1471                 if (!altLoc.equals(' ') && !altLoc.equals(currentAltLoc)) {
1472                   break;
1473                 }
1474                 // if (!altLoc.equals(' ') && !altLoc.equals('A') && !altLoc.equals(currentAltLoc)) {
1475                 //  break;
1476                 // }
1477                 String resName = line.substring(17, 20).trim();
1478                 Character chainID = line.substring(21, 22).charAt(0);
1479                 String segID = getExistingSegID(chainID);
1480                 int resSeq = Hybrid36.decode(4, line.substring(22, 26));
1481                 double[] d = new double[3];
1482                 d[0] = parseDouble(line.substring(30, 38).trim());
1483                 d[1] = parseDouble(line.substring(38, 46).trim());
1484                 d[2] = parseDouble(line.substring(46, 54).trim());
1485                 double occupancy = 1.0;
1486                 double tempFactor = 1.0;
1487                 Atom newAtom = new Atom(0, name, altLoc, d, resName, resSeq, chainID, occupancy,
1488                     tempFactor, segID);
1489                 newAtom.setHetero(hetatm);
1490                 // Check if this is a modified residue.
1491                 if (modRes.containsKey(resName.toUpperCase())) {
1492                   newAtom.setModRes(true);
1493                 }
1494 
1495                 Atom returnedAtom = activeMolecularAssembly.findAtom(newAtom);
1496                 if (returnedAtom != null) {
1497                   returnedAtom.setXYZ(d);
1498                   double[] retXYZ = new double[3];
1499                   returnedAtom.getXYZ(retXYZ);
1500                 } else {
1501                   String message = format(" Could not find atom %s in assembly", newAtom);
1502                   if (dieOnMissingAtom) {
1503                     logger.severe(message);
1504                   } else {
1505                     logger.warning(message);
1506                   }
1507                 }
1508                 break;
1509               case CRYST1:
1510                 // =============================================================================
1511                 // The CRYST1 record presents the unit cell parameters, space group, and Z
1512                 // value. If the structure was not determined by crystallographic means, CRYST1
1513                 // simply provides the unitary values, with an appropriate REMARK.
1514                 //
1515                 //  7 - 15       Real(9.3)     a              a (Angstroms).
1516                 // 16 - 24       Real(9.3)     b              b (Angstroms).
1517                 // 25 - 33       Real(9.3)     c              c (Angstroms).
1518                 // 34 - 40       Real(7.2)     alpha          alpha (degrees).
1519                 // 41 - 47       Real(7.2)     beta           beta (degrees).
1520                 // 48 - 54       Real(7.2)     gamma          gamma (degrees).
1521                 // 56 - 66       LString       sGroup         Space  group.
1522                 // 67 - 70       Integer       z              Z value.
1523                 // =============================================================================
1524                 logger.info(" Crystal record found.");
1525                 if (line.length() < 55) {
1526                   logger.severe(" CRYST1 record is improperly formatted.");
1527                 }
1528                 double aaxis = parseDouble(line.substring(6, 15).trim());
1529                 double baxis = parseDouble(line.substring(15, 24).trim());
1530                 double caxis = parseDouble(line.substring(24, 33).trim());
1531                 double alpha = parseDouble(line.substring(33, 40).trim());
1532                 double beta = parseDouble(line.substring(40, 47).trim());
1533                 double gamma = parseDouble(line.substring(47, 54).trim());
1534                 int limit = min(line.length(), 66);
1535                 String sg = line.substring(55, limit).trim();
1536 //                properties.clearProperty("a-axis");
1537 //                properties.clearProperty("b-axis");
1538 //                properties.clearProperty("c-axis");
1539 //                properties.clearProperty("alpha");
1540 //                properties.clearProperty("beta");
1541 //                properties.clearProperty("gamma");
1542 //                properties.clearProperty("spacegroup");
1543 //
1544 //                properties.addProperty("a-axis", aaxis);
1545 //                properties.addProperty("b-axis", baxis);
1546 //                properties.addProperty("c-axis", caxis);
1547 //                properties.addProperty("alpha", alpha);
1548 //                properties.addProperty("beta", beta);
1549 //                properties.addProperty("gamma", gamma);
1550 //                properties.addProperty("spacegroup", SpaceGroupInfo.pdb2ShortName(sg));
1551                 Crystal crystal = activeMolecularAssembly.getCrystal();
1552                 SpaceGroup spaceGroup = SpaceGroupDefinitions.spaceGroupFactory(sg);
1553                 if (Objects.equals(crystal.spaceGroup.shortName, spaceGroup.shortName)) {
1554                   crystal.changeUnitCellParameters(aaxis, baxis, caxis, alpha, beta, gamma);
1555                 } else {
1556                   // TODO: Handle changes in space groups... Means recalculating force field terms.
1557                   logger.warning(format(" Original space group %s could not be changed to %s",
1558                       crystal.spaceGroup.shortName, spaceGroup.shortName));
1559                 }
1560                 break;
1561               case ENDMDL:
1562               case END: // Technically speaking, END should be at the end of the file, not end of
1563                 // the model.
1564                 logger.log(Level.FINE, format(" Model %d successfully read", modelsRead));
1565                 modelDone = true;
1566                 break;
1567               case REMARK:
1568                 remarkLines.add(line.trim());
1569                 if (line.contains("Lambda:")) {
1570                   Matcher m = lambdaPattern.matcher(line);
1571                   if (m.find()) {
1572                     lastReadLambda = Double.parseDouble(m.group(1));
1573                   }
1574                 }
1575                 break;
1576               default:
1577                 break;
1578             }
1579           } catch (Exception ex) {
1580             // Do nothing; it's not an ATOM/HETATM line.
1581           }
1582           if (modelDone) {
1583             break;
1584           }
1585           line = currentReader.readLine();
1586         }
1587         return true;
1588       } catch (IOException ex) {
1589         logger.info(
1590             format(" Exception in parsing frame %d of %s:" + " %s", modelsRead, system.toString(),
1591                 ex));
1592       }
1593     }
1594     return false;
1595   }
1596 
1597   /**
1598    * Specify the alternate location.
1599    *
1600    * @param molecularAssembly The MolecularAssembly to populate.
1601    * @param altLoc The alternate location to use.
1602    */
1603   public void setAltID(MolecularAssembly molecularAssembly, Character altLoc) {
1604     setMolecularSystem(molecularAssembly);
1605     currentAltLoc = altLoc;
1606   }
1607 
1608   /**
1609    * Sets whether this PDBFilter should log each time it saves to a file.
1610    *
1611    * @param logWrites a boolean.
1612    */
1613   public void setLogWrites(boolean logWrites) {
1614     this.logWrites = logWrites;
1615   }
1616 
1617   /**
1618    * setModelNumbering.
1619    *
1620    * @param modelsWritten the number of models written.
1621    */
1622   public void setModelNumbering(int modelsWritten) {
1623     this.modelsWritten = modelsWritten;
1624   }
1625 
1626   public void setLMN(int[] lmn) {
1627     if(lmn[0] >= 1 && lmn[1] >= 1 && lmn[2] >= 1){
1628       this.lmn = lmn;
1629     }else{
1630       // Provided dimensions are not handled. Revert to P1.
1631       this.lmn = new int[]{1,1,1};
1632     }
1633   }
1634 
1635   /**
1636    * setSymOp.
1637    *
1638    * @param symOp a int.
1639    */
1640   public void setSymOp(int symOp) {
1641     this.nSymOp = symOp;
1642   }
1643 
1644   /**
1645    * Expand the current system to P1 during the save operation.
1646    *
1647    * @param file The file to write.
1648    * @return Return true on a successful write.
1649    */
1650   public boolean writeFileAsP1(File file) {
1651 
1652     Crystal crystal = activeMolecularAssembly.getCrystal();
1653     int nSymOps = crystal.getUnitCell().spaceGroup.getNumberOfSymOps();
1654 
1655     if (nSymOps == 1) {
1656       // This is a P1 system.
1657       if (!writeFile(file, false)) {
1658         logger.info(format(" Save failed for %s", activeMolecularAssembly));
1659         return false;
1660       } else {
1661         return true;
1662       }
1663     } else {
1664       Polymer[] polymers = activeMolecularAssembly.getChains();
1665       int chainCount = 0;
1666       for (Polymer polymer : polymers) {
1667         Character chainID = Polymer.CHAIN_IDS.charAt(chainCount++);
1668         polymer.setChainID(chainID);
1669         polymer.setSegID(chainID.toString());
1670       }
1671       nSymOp = 0;
1672       logWrites = false;
1673       boolean writeEnd = false;
1674       if (!writeFile(file, false, false, writeEnd)) {
1675         logger.info(format(" Save failed for %s", activeMolecularAssembly));
1676         return false;
1677       } else {
1678         for (int i = 1; i < nSymOps; i++) {
1679           nSymOp = i;
1680           for (Polymer polymer : polymers) {
1681             Character chainID = Polymer.CHAIN_IDS.charAt(chainCount++);
1682             polymer.setChainID(chainID);
1683             polymer.setSegID(chainID.toString());
1684           }
1685           writeEnd = i == nSymOps - 1;
1686           if (!writeFile(file, true, false, writeEnd)) {
1687             logger.info(format(" Save failed for %s", activeMolecularAssembly));
1688             return false;
1689           }
1690         }
1691       }
1692 
1693       // Reset the chainIDs.
1694       chainCount = 0;
1695       for (Polymer polymer : polymers) {
1696         Character chainID = Polymer.CHAIN_IDS.charAt(chainCount++);
1697         polymer.setChainID(chainID);
1698         polymer.setSegID(chainID.toString());
1699       }
1700 
1701     }
1702 
1703     return true;
1704   }
1705 
1706   /**
1707    * writeFile
1708    *
1709    * @param saveFile a {@link java.io.File} object.
1710    * @param append Whether to append to saveFile (vs over-write).
1711    * @param printLinear Ignored (remains to present a different method signature).
1712    * @param writeEnd True if this is the final model.
1713    * @return Success of writing.
1714    */
1715   public boolean writeFile(File saveFile, boolean append, boolean printLinear, boolean writeEnd) {
1716     return writeFile(saveFile, append, Collections.emptySet(), writeEnd, true);
1717   }
1718 
1719   /**
1720    * writeFile
1721    *
1722    * @param saveFile a {@link java.io.File} object to save to.
1723    * @param append Whether to append to saveFile (vs over-write).
1724    * @param toExclude A {@link java.util.Set} of {@link ffx.potential.bonded.Atom}s to exclude
1725    *     from writing.
1726    * @param writeEnd True if this is the final model.
1727    * @param versioning True if the file being saved to should be versioned. False if the file
1728    *     being saved to should be overwritten.
1729    * @return Success of writing.
1730    */
1731   public boolean writeFile(File saveFile, boolean append, Set<Atom> toExclude, boolean writeEnd,
1732       boolean versioning) {
1733     return writeFile(saveFile, append, toExclude, writeEnd, versioning, null);
1734   }
1735 
1736   /**
1737    * writeFile
1738    *
1739    * @param saveFile a {@link java.io.File} object to save to.
1740    * @param append Whether to append to saveFile (vs over-write).
1741    * @param toExclude A {@link java.util.Set} of {@link ffx.potential.bonded.Atom}s to exclude
1742    *     from writing.
1743    * @param writeEnd True if this is the final model.
1744    * @param versioning True if the file being saved to should be versioned. False if the file
1745    *     being saved to should be overwritten.
1746    * @param extraLines Extra comment/header lines to write.
1747    * @return Success of writing.
1748    */
1749   public boolean writeFile(File saveFile, boolean append, Set<Atom> toExclude, boolean writeEnd,
1750       boolean versioning, String[] extraLines) {
1751     if (standardizeAtomNames) {
1752       logger.info(" Setting atom names to PDB standard.");
1753       renameAtomsToPDBStandard(activeMolecularAssembly);
1754     }
1755     final Set<Atom> atomExclusions = toExclude == null ? Collections.emptySet() : toExclude;
1756     if (saveFile == null) {
1757       return false;
1758     }
1759     if (vdwH) {
1760       logger.info(" Saving hydrogen to van der Waals centers instead of nuclear locations.");
1761     }
1762     if (nSymOp > -1) {
1763       logger.info(format(" Saving atoms using the symmetry operator:\n%s\n",
1764           activeMolecularAssembly.getCrystal().getUnitCell().spaceGroup.getSymOp(nSymOp)
1765               .toString()));
1766     }
1767 
1768     // Create StringBuilders for ATOM, ANISOU and TER records that can be reused.
1769     StringBuilder sb = new StringBuilder("ATOM  ");
1770     StringBuilder anisouSB = new StringBuilder("ANISOU");
1771     StringBuilder terSB = new StringBuilder("TER   ");
1772     StringBuilder model = null;
1773     for (int i = 6; i < 80; i++) {
1774       sb.append(' ');
1775       anisouSB.append(' ');
1776       terSB.append(' ');
1777     }
1778 
1779     File newFile = saveFile;
1780     if (!append) {
1781       if (versioning) {
1782         newFile = version(saveFile);
1783       }
1784     } else if (modelsWritten >= 0) {
1785       model = new StringBuilder(format("MODEL     %-4d", ++modelsWritten));
1786       model.append(repeat(" ", 65));
1787     }
1788     activeMolecularAssembly.setFile(newFile);
1789     if (activeMolecularAssembly.getName() == null) {
1790       activeMolecularAssembly.setName(newFile.getName());
1791     }
1792     if (logWrites) {
1793       logger.log(Level.INFO, " Saving {0}", newFile.getName());
1794     }
1795 
1796     try (FileWriter fw = new FileWriter(newFile, append);
1797         BufferedWriter bw = new BufferedWriter(fw)) {
1798       /*
1799        Will come before CRYST1 and ATOM records, but after anything
1800        written by writeFileWithHeader (particularly X-ray refinement statistics).
1801       */
1802       String[] headerLines = activeMolecularAssembly.getHeaderLines();
1803       for (String line : headerLines) {
1804         bw.write(format("%s\n", line));
1805       }
1806       if (extraLines != null) {
1807         if (rotamerTitration && extraLines[0].contains("REMARK")) {
1808           for (String line : extraLines) {
1809             bw.write(line + "\n");
1810           }
1811         } else {
1812           for (String line : extraLines) {
1813             bw.write(format("REMARK 999 %s\n", line));
1814           }
1815         }
1816       }
1817       if (model != null) {
1818         bw.write(model.toString());
1819         bw.newLine();
1820       }
1821       // =============================================================================
1822       // The CRYST1 record presents the unit cell parameters, space group, and Z
1823       // value. If the structure was not determined by crystallographic means, CRYST1
1824       // simply provides the unitary values, with an appropriate REMARK.
1825       //
1826       //  7 - 15       Real(9.3)     a              a (Angstroms).
1827       // 16 - 24       Real(9.3)     b              b (Angstroms).
1828       // 25 - 33       Real(9.3)     c              c (Angstroms).
1829       // 34 - 40       Real(7.2)     alpha          alpha (degrees).
1830       // 41 - 47       Real(7.2)     beta           beta (degrees).
1831       // 48 - 54       Real(7.2)     gamma          gamma (degrees).
1832       // 56 - 66       LString       sGroup         Space  group.
1833       // 67 - 70       Integer       z              Z value.
1834       // =============================================================================
1835       if (nSymOp < 0) {
1836         // Write out the unit cell.
1837         Crystal crystal = activeMolecularAssembly.getCrystal();
1838         if (crystal != null && !crystal.aperiodic()) {
1839           Crystal c = crystal.getUnitCell();
1840           bw.write(c.toCRYST1());
1841         }
1842       } else if (nSymOp == 0) {
1843         // Write a P1 cell.
1844         Crystal crystal = activeMolecularAssembly.getCrystal();
1845         if (crystal != null && !crystal.aperiodic()) {
1846           Crystal c = crystal.getUnitCell();
1847           Crystal p1 = new Crystal(c.a, c.b, c.c, c.alpha, c.beta, c.gamma, "P1");
1848           bw.write(p1.toCRYST1());
1849         }
1850       }
1851       // =============================================================================
1852       // The SSBOND record identifies each disulfide bond in protein and polypeptide
1853       // structures by identifying the two residues involved in the bond.
1854       // The disulfide bond distance is included after the symmetry operations at
1855       // the end of the SSBOND record.
1856       //
1857       //  8 - 10        Integer         serNum       Serial number.
1858       // 12 - 14        LString(3)      "CYS"        Residue name.
1859       // 16             Character       chainID1     Chain identifier.
1860       // 18 - 21        Integer         seqNum1      Residue sequence number.
1861       // 22             AChar           icode1       Insertion code.
1862       // 26 - 28        LString(3)      "CYS"        Residue name.
1863       // 30             Character       chainID2     Chain identifier.
1864       // 32 - 35        Integer         seqNum2      Residue sequence number.
1865       // 36             AChar           icode2       Insertion code.
1866       // 60 - 65        SymOP           sym1         Symmetry oper for 1st resid
1867       // 67 - 72        SymOP           sym2         Symmetry oper for 2nd resid
1868       // 74 – 78        Real(5.2)      Length        Disulfide bond distance
1869       //
1870       // If SG of cysteine is disordered then there are possible alternate linkages.
1871       // wwPDB practice is to put together all possible SSBOND records. This is
1872       // problematic because the alternate location identifier is not specified in
1873       // the SSBOND record.
1874       // =============================================================================
1875       int serNum = 1;
1876       Polymer[] polymers = activeMolecularAssembly.getChains();
1877       if (polymers != null) {
1878         for (Polymer polymer : polymers) {
1879           List<Residue> residues = polymer.getResidues();
1880           for (Residue residue : residues) {
1881             if (residue.getName().equalsIgnoreCase("CYS")) {
1882               List<Atom> cysAtoms = residue.getAtomList().stream()
1883                   .filter(a -> !atomExclusions.contains(a)).toList();
1884               Atom SG1 = null;
1885               for (Atom atom : cysAtoms) {
1886                 String atName = atom.getName().toUpperCase();
1887                 if (atName.equals("SG") || atName.equals("SH")
1888                     || atom.getAtomType().atomicNumber == 16) {
1889                   SG1 = atom;
1890                   break;
1891                 }
1892               }
1893               List<Bond> bonds = SG1.getBonds();
1894               for (Bond bond : bonds) {
1895                 Atom SG2 = bond.get1_2(SG1);
1896                 if (SG2.getAtomType().atomicNumber == 16 && !atomExclusions.contains(SG2)) {
1897                   if (SG1.getIndex() < SG2.getIndex()) {
1898                     bond.energy(false);
1899                     bw.write(format("SSBOND %3d CYS %1s %4s    CYS %1s %4s %36s %5.2f\n", serNum++,
1900                         SG1.getChainID().toString(), Hybrid36.encode(4, SG1.getResidueNumber()),
1901                         SG2.getChainID().toString(), Hybrid36.encode(4, SG2.getResidueNumber()), "",
1902                         bond.getValue()));
1903                   }
1904                 }
1905               }
1906             }
1907           }
1908         }
1909       }
1910       // =============================================================================
1911       //
1912       //  7 - 11        Integer       serial       Atom serial number.
1913       // 13 - 16        Atom          name         Atom name.
1914       // 17             Character     altLoc       Alternate location indicator.
1915       // 18 - 20        Residue name  resName      Residue name.
1916       // 22             Character     chainID      Chain identifier.
1917       // 23 - 26        Integer       resSeq       Residue sequence number.
1918       // 27             AChar         iCode        Code for insertion of residues.
1919       // 31 - 38        Real(8.3)     x            Orthogonal coordinates for X in Angstroms.
1920       // 39 - 46        Real(8.3)     y            Orthogonal coordinates for Y in Angstroms.
1921       // 47 - 54        Real(8.3)     z            Orthogonal coordinates for Z in Angstroms.
1922       // 55 - 60        Real(6.2)     occupancy    Occupancy.
1923       // 61 - 66        Real(6.2)     tempFactor   Temperature factor.
1924       // 77 - 78        LString(2)    element      Element symbol, right-justified.
1925       // 79 - 80        LString(2)    charge       Charge  on the atom.
1926       // =============================================================================
1927       //         1         2         3         4         5         6         7
1928       // 123456789012345678901234567890123456789012345678901234567890123456789012345678
1929       // ATOM      1  N   ILE A  16      60.614  71.140 -10.592  1.00  7.38           N
1930       // ATOM      2  CA  ILE A  16      60.793  72.149  -9.511  1.00  6.91           C
1931       MolecularAssembly[] molecularAssemblies = this.getMolecularAssemblyArray();
1932       int serial = 1;
1933       if (nSymOp > 0) {
1934         serial = serialP1;
1935       }
1936 
1937       // Loop over biomolecular chains
1938       if (polymers != null) {
1939         for (Polymer polymer : polymers) {
1940           currentSegID = polymer.getName();
1941           currentChainID = polymer.getChainID();
1942           sb.setCharAt(21, currentChainID);
1943           // Loop over residues
1944           List<Residue> residues = polymer.getResidues();
1945           for (Residue residue : residues) {
1946             String resName = residue.getName();
1947             if (resName.length() > 3) {
1948               resName = resName.substring(0, 3);
1949             }
1950             int resID = residue.getResidueNumber();
1951             sb.replace(17, 20, padLeft(resName.toUpperCase(), 3));
1952             sb.replace(22, 26, format("%4s", Hybrid36.encode(4, resID)));
1953             // Loop over atoms
1954             List<Atom> residueAtoms = residue.getAtomList().stream()
1955                 .filter(a -> !atomExclusions.contains(a)).collect(Collectors.toList());
1956             boolean altLocFound = false;
1957             for (Atom atom : residueAtoms) {
1958               writeAtom(atom, serial++, sb, anisouSB, bw);
1959               Character altLoc = atom.getAltLoc();
1960               if (altLoc != null && !altLoc.equals(' ')) {
1961                 altLocFound = true;
1962               }
1963             }
1964             // Write out alternate conformers
1965             if (altLocFound) {
1966               for (int ma = 1; ma < molecularAssemblies.length; ma++) {
1967                 MolecularAssembly altMolecularAssembly = molecularAssemblies[ma];
1968                 Polymer altPolymer = altMolecularAssembly.getPolymer(currentChainID, currentSegID, false);
1969                 Residue altResidue = altPolymer.getResidue(resName, resID, false, Residue.ResidueType.AA);
1970                 if (altResidue == null) {
1971                   resName = AminoAcid3.UNK.name();
1972                   altResidue = altPolymer.getResidue(resName, resID, false, Residue.ResidueType.AA);
1973                 }
1974                 residueAtoms = altResidue.getAtomList().stream()
1975                     .filter(a -> !atomExclusions.contains(a)).collect(Collectors.toList());
1976                 for (Atom atom : residueAtoms) {
1977                   if (atom.getAltLoc() != null && !atom.getAltLoc().equals(' ') && !atom.getAltLoc()
1978                       .equals('A')) {
1979                     sb.replace(17, 20, padLeft(atom.getResidueName().toUpperCase(), 3));
1980                     writeAtom(atom, serial++, sb, anisouSB, bw);
1981                   }
1982                 }
1983               }
1984             }
1985           }
1986           terSB.replace(6, 11, format("%5s", Hybrid36.encode(5, serial++)));
1987           terSB.replace(12, 16, "    ");
1988           terSB.replace(16, 26, sb.substring(16, 26));
1989           bw.write(terSB.toString());
1990           bw.newLine();
1991         }
1992       }
1993       sb.replace(0, 6, "HETATM");
1994       sb.setCharAt(21, 'A');
1995 
1996       Character chainID = 'A';
1997       if (polymers != null) {
1998         chainID = polymers[0].getChainID();
1999       }
2000       activeMolecularAssembly.setChainIDAndRenumberMolecules(chainID);
2001 
2002       // Loop over molecules, ions and then water.
2003       List<MSNode> molecules = activeMolecularAssembly.getMolecules();
2004       int numMolecules = molecules.size();
2005       for (int i = 0; i < numMolecules; i++) {
2006         Molecule molecule = (Molecule) molecules.get(i);
2007         chainID = molecule.getChainID();
2008         sb.setCharAt(21, chainID);
2009         String resName = molecule.getResidueName();
2010         int resID = molecule.getResidueNumber();
2011         if (resName.length() > 3) {
2012           resName = resName.substring(0, 3);
2013         }
2014         sb.replace(17, 20, padLeft(resName.toUpperCase(), 3));
2015         sb.replace(22, 26, format("%4s", Hybrid36.encode(4, resID)));
2016         // List<Atom> moleculeAtoms = molecule.getAtomList();
2017         List<Atom> moleculeAtoms = molecule.getAtomList().stream()
2018             .filter(a -> !atomExclusions.contains(a)).collect(Collectors.toList());
2019         boolean altLocFound = false;
2020         for (Atom atom : moleculeAtoms) {
2021           writeAtom(atom, serial++, sb, anisouSB, bw);
2022           Character altLoc = atom.getAltLoc();
2023           if (altLoc != null && !altLoc.equals(' ')) {
2024             altLocFound = true;
2025           }
2026         }
2027         // Write out alternate conformers
2028         if (altLocFound) {
2029           for (int ma = 1; ma < molecularAssemblies.length; ma++) {
2030             MolecularAssembly altMolecularAssembly = molecularAssemblies[ma];
2031             MSNode altmolecule = altMolecularAssembly.getMolecules().get(i);
2032             moleculeAtoms = altmolecule.getAtomList();
2033             for (Atom atom : moleculeAtoms) {
2034               if (atom.getAltLoc() != null && !atom.getAltLoc().equals(' ') && !atom.getAltLoc()
2035                   .equals('A')) {
2036                 writeAtom(atom, serial++, sb, anisouSB, bw);
2037               }
2038             }
2039           }
2040         }
2041       }
2042 
2043       List<MSNode> ions = activeMolecularAssembly.getIons();
2044       for (int i = 0; i < ions.size(); i++) {
2045         Molecule ion = (Molecule) ions.get(i);
2046         chainID = ion.getChainID();
2047         sb.setCharAt(21, chainID);
2048         String resName = ion.getResidueName();
2049         int resID = ion.getResidueNumber();
2050         if (resName.length() > 3) {
2051           resName = resName.substring(0, 3);
2052         }
2053         sb.replace(17, 20, padLeft(resName.toUpperCase(), 3));
2054         sb.replace(22, 26, format("%4s", Hybrid36.encode(4, resID)));
2055         // List<Atom> ionAtoms = ion.getAtomList();
2056         List<Atom> ionAtoms = ion.getAtomList().stream().filter(a -> !atomExclusions.contains(a))
2057             .collect(Collectors.toList());
2058         boolean altLocFound = false;
2059         for (Atom atom : ionAtoms) {
2060           writeAtom(atom, serial++, sb, anisouSB, bw);
2061           Character altLoc = atom.getAltLoc();
2062           if (altLoc != null && !altLoc.equals(' ')) {
2063             altLocFound = true;
2064           }
2065         }
2066         // Write out alternate conformers
2067         if (altLocFound) {
2068           for (int ma = 1; ma < molecularAssemblies.length; ma++) {
2069             MolecularAssembly altMolecularAssembly = molecularAssemblies[ma];
2070             MSNode altion = altMolecularAssembly.getIons().get(i);
2071             ionAtoms = altion.getAtomList();
2072             for (Atom atom : ionAtoms) {
2073               if (atom.getAltLoc() != null && !atom.getAltLoc().equals(' ') && !atom.getAltLoc()
2074                   .equals('A')) {
2075                 writeAtom(atom, serial++, sb, anisouSB, bw);
2076               }
2077             }
2078           }
2079         }
2080       }
2081 
2082       List<MSNode> water = activeMolecularAssembly.getWater();
2083       for (int i = 0; i < water.size(); i++) {
2084         Molecule wat = (Molecule) water.get(i);
2085         chainID = wat.getChainID();
2086         sb.setCharAt(21, chainID);
2087         String resName = wat.getResidueName();
2088         int resID = wat.getResidueNumber();
2089         if (resName.length() > 3) {
2090           resName = resName.substring(0, 3);
2091         }
2092         sb.replace(17, 20, padLeft(resName.toUpperCase(), 3));
2093         sb.replace(22, 26, format("%4s", Hybrid36.encode(4, resID)));
2094         List<Atom> waterAtoms = wat.getAtomList().stream().filter(a -> !atomExclusions.contains(a))
2095             .collect(Collectors.toList());
2096         boolean altLocFound = false;
2097         for (Atom atom : waterAtoms) {
2098           writeAtom(atom, serial++, sb, anisouSB, bw);
2099           Character altLoc = atom.getAltLoc();
2100           if (altLoc != null && !altLoc.equals(' ')) {
2101             altLocFound = true;
2102           }
2103         }
2104         // Write out alternate conformers
2105         if (altLocFound) {
2106           for (int ma = 1; ma < molecularAssemblies.length; ma++) {
2107             MolecularAssembly altMolecularAssembly = molecularAssemblies[ma];
2108             MSNode altwater = altMolecularAssembly.getWater().get(i);
2109             waterAtoms = altwater.getAtomList();
2110             for (Atom atom : waterAtoms) {
2111               if (atom.getAltLoc() != null && !atom.getAltLoc().equals(' ') && !atom.getAltLoc()
2112                   .equals('A')) {
2113                 writeAtom(atom, serial++, sb, anisouSB, bw);
2114               }
2115             }
2116           }
2117         }
2118       }
2119 
2120       if (model != null) {
2121         bw.write("ENDMDL");
2122         bw.newLine();
2123       }
2124 
2125       if (writeEnd) {
2126         bw.write("END");
2127         bw.newLine();
2128       }
2129 
2130       if (nSymOp >= 0) {
2131         serialP1 = serial;
2132       }
2133 
2134     } catch (Exception e) {
2135       String message = "Exception writing to file: " + saveFile;
2136       logger.log(Level.WARNING, message, e);
2137       return false;
2138     }
2139     return true;
2140   }
2141 
2142   /**
2143    * {@inheritDoc}
2144    *
2145    * <p>Write out the Atomic information in PDB format.
2146    */
2147   @Override
2148   public boolean writeFile(File saveFile, boolean append) {
2149     return writeFile(saveFile, append, false, true);
2150   }
2151 
2152   public boolean writeFile(File saveFile, boolean append, String[] extraLines) {
2153     return writeFile(saveFile, append, Collections.emptySet(), false, !append, extraLines);
2154   }
2155 
2156   /**
2157    * Writes out the atomic information in PDB format.
2158    *
2159    * @param saveFile The file to save information to.
2160    * @param append True if the current data should be appended to the saveFile (as in arc
2161    *     files).
2162    * @param versioning True if the saveFile should be versioned. False if the saveFile should be
2163    *     overwritten.
2164    * @return Success of writing.
2165    */
2166   public boolean writeFile(File saveFile, boolean append, boolean versioning) {
2167     return writeFile(saveFile, append, Collections.emptySet(), true, versioning);
2168   }
2169 
2170   /**
2171    * writeFileWithHeader.
2172    *
2173    * @param saveFile a {@link java.io.File} object.
2174    * @param header a {@link java.lang.String} object.
2175    * @param append a boolean.
2176    * @return a boolean.
2177    */
2178   public boolean writeFileWithHeader(File saveFile, String header, boolean append) {
2179     if (standardizeAtomNames) {
2180       logger.info(" Setting atom names to PDB standard.");
2181       renameAtomsToPDBStandard(activeMolecularAssembly);
2182     }
2183     activeMolecularAssembly.setFile(saveFile);
2184     activeMolecularAssembly.setName(saveFile.getName());
2185 
2186     try (FileWriter fw = new FileWriter(saveFile, append); BufferedWriter bw = new BufferedWriter(
2187         fw)) {
2188       bw.write(header);
2189       bw.newLine();
2190     } catch (Exception e) {
2191       String message = " Exception writing to file: " + saveFile;
2192       logger.log(Level.WARNING, message, e);
2193       return false;
2194     }
2195     if (writeFile(saveFile, true)) {
2196       logger.log(Level.INFO, " Wrote PDB to file {0}", saveFile.getPath());
2197       return true;
2198     } else {
2199       logger.log(Level.INFO, " Error writing to file {0}", saveFile.getPath());
2200       return false;
2201     }
2202   }
2203 
2204   /**
2205    * writeFileWithHeader.
2206    *
2207    * @param saveFile a {@link java.io.File} object.
2208    * @param header a {@link java.lang.String} object.
2209    * @return a boolean.
2210    */
2211   public boolean writeFileWithHeader(File saveFile, String header) {
2212     return writeFileWithHeader(saveFile, header, true);
2213   }
2214 
2215   /**
2216    * writeFileWithHeader.
2217    *
2218    * @param saveFile a {@link java.io.File} object.
2219    * @param header a {@link java.lang.StringBuilder} object.
2220    * @return a boolean.
2221    */
2222   public boolean writeFileWithHeader(File saveFile, StringBuilder header) {
2223     return writeFileWithHeader(saveFile, header.toString());
2224   }
2225 
2226   /**
2227    * Get unique SegID for possibly duplicate chain IDs.
2228    *
2229    * @param c chain ID just read.
2230    * @return a unique segID.
2231    */
2232   private String getExistingSegID(Character c) {
2233     if (c.equals(' ')) {
2234       c = 'A';
2235     }
2236 
2237     List<String> segIDs = segidMap.get(c);
2238     if (segIDs != null) {
2239       String segID = segIDs.get(0);
2240       if (segIDs.size() > 1) {
2241         logger.log(Level.INFO, format(" Multiple SegIDs for to chain %s; using %s.", c, segID));
2242       }
2243       return segID;
2244     } else {
2245       logger.log(Level.INFO, format(" Creating SegID for to chain %s", c));
2246       return getSegID(c);
2247     }
2248   }
2249 
2250   /**
2251    * Convert possibly duplicate chain IDs into unique segIDs.
2252    *
2253    * @param c chain ID just read.
2254    * @return a unique segID.
2255    */
2256   private String getSegID(Character c) {
2257     if (c.equals(' ')) {
2258       c = 'A';
2259     }
2260 
2261     // If the chain ID has not changed, return the existing segID.
2262     if (c.equals(currentChainID)) {
2263       return currentSegID;
2264     }
2265 
2266     // Loop through existing segIDs to find the first one that is unused.
2267     int count = 0;
2268     for (String segID : segIDs) {
2269       if (segID.endsWith(c.toString())) {
2270         count++;
2271       }
2272     }
2273 
2274     // If the count is greater than 0, then append it.
2275     String newSegID;
2276     if (count == 0) {
2277       newSegID = c.toString();
2278     } else {
2279       newSegID = count + c.toString();
2280     }
2281     segIDs.add(newSegID);
2282     currentChainID = c;
2283     currentSegID = newSegID;
2284 
2285     if (segidMap.containsKey(c)) {
2286       segidMap.get(c).add(newSegID);
2287     } else {
2288       List<String> newChainList = new ArrayList<>();
2289       newChainList.add(newSegID);
2290       segidMap.put(c, newChainList);
2291     }
2292 
2293     return newSegID;
2294   }
2295 
2296   /**
2297    * writeAtom
2298    *
2299    * @param atom a {@link ffx.potential.bonded.Atom} object.
2300    * @param serial a int.
2301    * @param sb a {@link java.lang.StringBuilder} object.
2302    * @param anisouSB a {@link java.lang.StringBuilder} object.
2303    * @param bw a {@link java.io.BufferedWriter} object.
2304    * @throws java.io.IOException if any.
2305    */
2306   private void writeAtom(Atom atom, int serial, StringBuilder sb, StringBuilder anisouSB,
2307       BufferedWriter bw) throws IOException {
2308     String name = atom.getName();
2309     if (name.length() > 4) {
2310       name = name.substring(0, 4);
2311     } else if (name.length() == 1) {
2312       name = name + "  ";
2313     } else if (name.length() == 2) {
2314       if (atom.getAtomType().valence == 0) {
2315         name = name + "  ";
2316       } else {
2317         name = name + " ";
2318       }
2319     }
2320     double[] xyz = vdwH ? atom.getRedXYZ() : atom.getXYZ(null);
2321     if (nSymOp >= 0) {
2322       Crystal crystal = activeMolecularAssembly.getCrystal().getUnitCell();
2323       SymOp symOp = crystal.spaceGroup.getSymOp(nSymOp);
2324       double[] newXYZ = new double[xyz.length];
2325       crystal.applySymOp(xyz, newXYZ, symOp);
2326       xyz = newXYZ;
2327     }
2328     sb.replace(6, 16, format("%5s " + padLeft(name.toUpperCase(), 4), Hybrid36.encode(5, serial)));
2329     Character altLoc = atom.getAltLoc();
2330     sb.setCharAt(16, Objects.requireNonNullElse(altLoc, ' '));
2331 
2332 
2333     /*
2334      * On the following code:
2335      * #1: StringBuilder.replace will allow for longer strings, expanding the StringBuilder's length if necessary.
2336      *
2337      * #2: sb was never re-initialized, so if there was overflow,
2338      * sb would continue to be > 80 characters long, resulting in broken PDB files
2339      *
2340      * #3: It may be wiser to have XYZ coordinates result in shutdown, not
2341      * truncation of coordinates. #4: Excessive B-factors aren't much of an
2342      * issue; if the B-factor is past 999.99, that's the difference between
2343      * "density extends to Venus" and "density extends to Pluto".
2344      */
2345     StringBuilder decimals = new StringBuilder();
2346     for (int i = 0; i < 3; i++) {
2347       try {
2348         decimals.append(StringUtils.fwFpDec(xyz[i], 8, 3));
2349       } catch (IllegalArgumentException ex) {
2350         String newValue = StringUtils.fwFpTrunc(xyz[i], 8, 3);
2351         logger.info(format(" XYZ %d coordinate %8.3f for atom %s "
2352             + "overflowed bounds of 8.3f string specified by PDB "
2353             + "format; truncating value to %s", i, xyz[i], atom, newValue));
2354         decimals.append(newValue);
2355       }
2356     }
2357     try {
2358       decimals.append(StringUtils.fwFpDec(atom.getOccupancy(), 6, 2));
2359     } catch (IllegalArgumentException ex) {
2360       logger.severe(
2361           format(" Occupancy %f for atom %s is impossible; " + "value must be between 0 and 1",
2362               atom.getOccupancy(), atom));
2363     }
2364     try {
2365       decimals.append(StringUtils.fwFpDec(atom.getTempFactor(), 6, 2));
2366     } catch (IllegalArgumentException ex) {
2367       String newValue = StringUtils.fwFpTrunc(atom.getTempFactor(), 6, 2);
2368       logger.info(format(" Atom temp factor %6.2f for atom %s overflowed "
2369               + "bounds of 6.2f string specified by PDB format; truncating " + "value to %s",
2370           atom.getTempFactor(), atom, newValue));
2371       decimals.append(newValue);
2372     }
2373     sb.replace(30, 66, decimals.toString());
2374 
2375     name = Atom.ElementSymbol.values()[atom.getAtomicNumber() - 1].toString();
2376     name = name.toUpperCase();
2377     if (atom.isDeuterium()) {
2378       name = "D";
2379     }
2380     sb.replace(76, 78, padLeft(name, 2));
2381     sb.replace(78, 80, format("%2d", 0));
2382     bw.write(sb.toString());
2383     bw.newLine();
2384     // =============================================================================
2385     //  1 - 6        Record name   "ANISOU"
2386     //  7 - 11       Integer       serial         Atom serial number.
2387     // 13 - 16       Atom          name           Atom name.
2388     // 17            Character     altLoc         Alternate location indicator
2389     // 18 - 20       Residue name  resName        Residue name.
2390     // 22            Character     chainID        Chain identifier.
2391     // 23 - 26       Integer       resSeq         Residue sequence number.
2392     // 27            AChar         iCode          Insertion code.
2393     // 29 - 35       Integer       u[0][0]        U(1,1)
2394     // 36 - 42       Integer       u[1][1]        U(2,2)
2395     // 43 - 49       Integer       u[2][2]        U(3,3)
2396     // 50 - 56       Integer       u[0][1]        U(1,2)
2397     // 57 - 63       Integer       u[0][2]        U(1,3)
2398     // 64 - 70       Integer       u[1][2]        U(2,3)
2399     // 77 - 78       LString(2)    element        Element symbol, right-justified.
2400     // 79 - 80       LString(2)    charge         Charge on the atom.
2401     // =============================================================================
2402     double[] anisou = atom.getAnisou(null);
2403     if (anisou != null) {
2404       anisouSB.replace(6, 80, sb.substring(6, 80));
2405       anisouSB.replace(28, 70,
2406           format("%7d%7d%7d%7d%7d%7d", (int) (anisou[0] * 1e4), (int) (anisou[1] * 1e4),
2407               (int) (anisou[2] * 1e4), (int) (anisou[3] * 1e4), (int) (anisou[4] * 1e4),
2408               (int) (anisou[5] * 1e4)));
2409       bw.write(anisouSB.toString());
2410       bw.newLine();
2411     }
2412   }
2413 
2414   /** PDB records that are recognized. */
2415   private enum Record {
2416     ANISOU, ATOM, CONECT, CRYST1, DBREF, END, MODEL, ENDMDL, HELIX, HETATM, LINK, MTRIX1, MTRIX2, MTRIX3, MODRES, SEQRES, SHEET, SSBOND, REMARK
2417   }
2418 
2419   /** Presently, VERSION3_3 is default, and VERSION3_2 is anything non-standard. */
2420   public enum PDBFileStandard {
2421     VERSION2_3, VERSION3_0, VERSION3_1, VERSION3_2, VERSION3_3
2422   }
2423 
2424   public static class Mutation {
2425 
2426     /** Residue ID of the residue to mutate. */
2427     final int resID;
2428     /** Residue name after mutation. */
2429     final String resName;
2430     /** Character for the chain ID of the residue that will be mutated. */
2431     final char chainChar;
2432 
2433     public Mutation(int resID, char chainChar, String newResName) {
2434       newResName = newResName.toUpperCase();
2435       if (newResName.length() != 3) {
2436         logger.log(Level.WARNING, format("Invalid mutation target: %s.", newResName));
2437       }
2438       try {
2439         AminoAcidUtils.AminoAcid3.valueOf(newResName);
2440       } catch (IllegalArgumentException ex) {
2441         logger.log(Level.WARNING, format("Invalid mutation target: %s.", newResName));
2442       }
2443       this.resID = resID;
2444       this.chainChar = chainChar;
2445       this.resName = newResName;
2446     }
2447   }
2448 }