View Javadoc
1   // ******************************************************************************
2   //
3   // Title:       Force Field X.
4   // Description: Force Field X - Software for Molecular Biophysics.
5   // Copyright:   Copyright (c) Michael J. Schnieders 2001-2025.
6   //
7   // This file is part of Force Field X.
8   //
9   // Force Field X is free software; you can redistribute it and/or modify it
10  // under the terms of the GNU General Public License version 3 as published by
11  // the Free Software Foundation.
12  //
13  // Force Field X is distributed in the hope that it will be useful, but WITHOUT
14  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16  // details.
17  //
18  // You should have received a copy of the GNU General Public License along with
19  // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple
20  // Place, Suite 330, Boston, MA 02111-1307 USA
21  //
22  // Linking this library statically or dynamically with other modules is making a
23  // combined work based on this library. Thus, the terms and conditions of the
24  // GNU General Public License cover the whole combination.
25  //
26  // As a special exception, the copyright holders of this library give you
27  // permission to link this library with independent modules to produce an
28  // executable, regardless of the license terms of these independent modules, and
29  // to copy and distribute the resulting executable under terms of your choice,
30  // provided that you also meet, for each linked independent module, the terms
31  // and conditions of the license of that module. An independent module is a
32  // module which is not derived from or based on this library. If you modify this
33  // library, you may extend this exception to your version of the library, but
34  // you are not obligated to do so. If you do not wish to do so, delete this
35  // exception statement from your version.
36  //
37  // ******************************************************************************
38  package ffx.potential.parsers;
39  
40  import ffx.crystal.Crystal;
41  import ffx.crystal.SpaceGroup;
42  import ffx.crystal.SpaceGroupDefinitions;
43  import ffx.crystal.SpaceGroupInfo;
44  import ffx.crystal.SymOp;
45  import ffx.numerics.math.DoubleMath;
46  import ffx.potential.MolecularAssembly;
47  import ffx.potential.Utilities.FileType;
48  import ffx.potential.bonded.AminoAcidUtils;
49  import ffx.potential.bonded.AminoAcidUtils.AminoAcid3;
50  import ffx.potential.bonded.Atom;
51  import ffx.potential.bonded.Bond;
52  import ffx.potential.bonded.MSNode;
53  import ffx.potential.bonded.Molecule;
54  import ffx.potential.bonded.NucleicAcidUtils;
55  import ffx.potential.bonded.Polymer;
56  import ffx.potential.bonded.Residue;
57  import ffx.potential.parameters.ForceField;
58  import ffx.utilities.Hybrid36;
59  import ffx.utilities.StringUtils;
60  import org.apache.commons.configuration2.CompositeConfiguration;
61  
62  import java.io.BufferedReader;
63  import java.io.BufferedWriter;
64  import java.io.File;
65  import java.io.FileNotFoundException;
66  import java.io.FileReader;
67  import java.io.FileWriter;
68  import java.io.IOException;
69  import java.util.ArrayList;
70  import java.util.Collections;
71  import java.util.HashMap;
72  import java.util.List;
73  import java.util.Map;
74  import java.util.Objects;
75  import java.util.OptionalDouble;
76  import java.util.Set;
77  import java.util.logging.Level;
78  import java.util.logging.Logger;
79  import java.util.regex.Matcher;
80  import java.util.regex.Pattern;
81  import java.util.stream.Collectors;
82  
83  import static ffx.potential.bonded.BondedUtils.numberAtoms;
84  import static ffx.potential.bonded.NamingUtils.renameAtomsToPDBStandard;
85  import static ffx.potential.bonded.PolymerUtils.assignAtomTypes;
86  import static ffx.potential.bonded.PolymerUtils.buildDisulfideBonds;
87  import static ffx.potential.bonded.PolymerUtils.buildMissingResidues;
88  import static ffx.potential.bonded.PolymerUtils.locateDisulfideBonds;
89  import static ffx.potential.parsers.PDBFilter.PDBFileStandard.VERSION3_2;
90  import static ffx.potential.parsers.PDBFilter.PDBFileStandard.VERSION3_3;
91  import static ffx.utilities.StringUtils.padLeft;
92  import static java.lang.Double.parseDouble;
93  import static java.lang.Integer.parseInt;
94  import static java.lang.String.format;
95  import static org.apache.commons.lang3.StringUtils.repeat;
96  import static org.apache.commons.math3.util.FastMath.min;
97  import static org.apache.commons.math3.util.FastMath.toDegrees;
98  
99  /**
100  * The PDBFilter class parses data from a Protein DataBank (*.PDB) file. The following records are
101  * recognized: ANISOU, ATOM, CONECT, CRYST1, END, HELIX, HETATM, LINK, SHEET, SSBOND, REMARK. The
102  * rest are currently ignored.
103  *
104  * @author Michael J. Schnieders
105  * @see <a href="http://www.wwpdb.org/documentation/format32/v3.2.html">PDB format 3.2</a>
106  * @since 1.0
107  */
108 public final class PDBFilter extends SystemFilter {
109 
110   private static final Logger logger = Logger.getLogger(PDBFilter.class.getName());
111   private static final Set<String> backboneNames;
112   private static final Set<String> constantPhBackboneNames;
113   private static final Set<String> naBackboneNames;
114 
115   static {
116     String[] names = {"C", "CA", "N", "O", "OXT", "OT2"};
117     backboneNames = Set.of(names);
118 
119     String[] constantPhNames = {"C", "CA", "N", "O", "OXT", "OT2", "H", "HA", "H1", "H2", "H3"};
120     constantPhBackboneNames = Set.of(constantPhNames);
121 
122     String[] naNames = {"P", "OP1", "OP2", "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "C1'"};
123     naBackboneNames = Set.of(naNames);
124   }
125 
126   /** Map of SEQRES entries. */
127   private final Map<Character, String[]> seqRes = new HashMap<>();
128   /** Map of DBREF entries. */
129   private final Map<Character, int[]> dbRef = new HashMap<>();
130   /** List of altLoc characters seen in the PDB file. */
131   private final List<Character> altLocs = new ArrayList<>();
132   /**
133    * List of segIDs defined for the PDB file.
134    *
135    * <p>The expectation is for chain naming from A-Z, then from 0-9. For large systems, chain names
136    * are sometimes reused due to limitations in the PDB format.
137    *
138    * <p>However, we define segIDs to always be unique. For the first A-Z,0-9 series chainID ==
139    * segID. Then, for second A-Z,0-9 series, the segID = 1A-1Z,10-19, and for the third series segID
140    * = 2A-2Z,20-29, and so on.
141    */
142   private final List<String> segIDs = new ArrayList<>();
143 
144   private final Map<Character, List<String>> segidMap = new HashMap<>();
145   /** Maps a chain to the number of insertion codes encountered in that chain. */
146   private final Map<Character, Integer> insertionCodeCount = new HashMap<>();
147   /**
148    * Maps chainIDResNumInsCode to renumbered chainIDResNum. For example, residue 52A in chain C might
149    * be renumbered to residue 53, and mapped as "C52A" to "C53".
150    */
151   private final Map<String, String> pdbToNewResMap = new HashMap<>();
152   /** List of modified residues * */
153   private final Map<String, String> modRes = new HashMap<>();
154   /** Keep track of ATOM record serial numbers to match them with ANISOU records. */
155   private final HashMap<Integer, Atom> atoms = new HashMap<>();
156 
157   private final Map<MolecularAssembly, BufferedReader> readers = new HashMap<>();
158   /** The current altLoc - i.e., the one we are defining a chemical system for. */
159   private Character currentAltLoc = 'A';
160   /** Character for the current chain ID. */
161   private Character currentChainID = null;
162   /** String for the current SegID. */
163   private String currentSegID = null;
164   /** Flag to indicate a mutation is requested. */
165   private boolean mutate = false;
166   private List<Mutation> mutations = null;
167   private List<Integer> resNumberList = null;
168   /** Flag to indicate if missing fields should be printed (i.e. missing B-factors). */
169   private boolean printMissingFields = true;
170   /** Number of symmetry operators when expanding to a P1 unit cell (-1 saves as current spacegroup). */
171   private int nSymOp = -1;
172   /** Number of replicates in A lattice direction (-1 defaults to unit cell). */
173   private int lValue = -1;
174   /** Number of replicates in B lattice direction (-1 defaults to unit cell). */
175   private int mValue = -1;
176   /** Number of replicates in C lattice direction (-1 defaults to unit cell). */
177   private int nValue = -1;
178   /**
179    * The serial field continues from the previous asymmetric unit when expanding to P1. This is not
180    * used when saving as the current spacegroup.
181    */
182   private int serialP1 = 0;
183   /** Assume current standard. */
184   private PDBFileStandard fileStandard = VERSION3_3;
185   /** If false, skip logging "Saving file". */
186   private boolean logWrites = true;
187   /** Keep track of the current MODEL in the file. */
188   private int modelsRead = 1;
189   /** Tracks output MODEL numbers. Unused if below zero. */
190   private int modelsWritten = -1;
191   /** Replicates vector dimensions if saving as expanded. */
192   private int[] lmn = new int[]{1,1,1};
193   private String versionFileName;
194 
195   private final File readFile;
196   private List<String> remarkLines = Collections.emptyList();
197   private double lastReadLambda = Double.NaN;
198 
199   /**
200    * If true, read in titratable residues in their fully protonated form.
201    */
202   private boolean constantPH = false;
203   /**
204    * If true, read in titratable residues in their protonated form.
205    */
206   private boolean rotamerTitration = false;
207   /**
208    * List of residue to rename for constantPH simulations.
209    */
210   private static final HashMap<AminoAcid3, AminoAcid3> constantPHResidueMap = new HashMap<>();
211 
212   static {
213     // Lysine
214     constantPHResidueMap.put(AminoAcidUtils.AminoAcid3.LYD, AminoAcidUtils.AminoAcid3.LYS);
215     // Cysteine
216     constantPHResidueMap.put(AminoAcidUtils.AminoAcid3.CYD, AminoAcidUtils.AminoAcid3.CYS);
217     // Histidine
218     constantPHResidueMap.put(AminoAcidUtils.AminoAcid3.HID, AminoAcidUtils.AminoAcid3.HIS);
219     constantPHResidueMap.put(AminoAcidUtils.AminoAcid3.HIE, AminoAcidUtils.AminoAcid3.HIS);
220     // Aspartate
221     constantPHResidueMap.put(AminoAcidUtils.AminoAcid3.ASP, AminoAcidUtils.AminoAcid3.ASD);
222     constantPHResidueMap.put(AminoAcidUtils.AminoAcid3.ASH, AminoAcidUtils.AminoAcid3.ASD);
223     // Glutamate
224     constantPHResidueMap.put(AminoAcidUtils.AminoAcid3.GLU, AminoAcidUtils.AminoAcid3.GLD);
225     constantPHResidueMap.put(AminoAcidUtils.AminoAcid3.GLH, AminoAcidUtils.AminoAcid3.GLD);
226   }
227 
228   /**
229    * List of residue to rename for rotamer titration simulations.
230    */
231   private static final HashMap<AminoAcid3, AminoAcid3> rotamerResidueMap = new HashMap<>();
232 
233   static {
234     // Lysine
235     rotamerResidueMap.put(AminoAcidUtils.AminoAcid3.LYD, AminoAcidUtils.AminoAcid3.LYS);
236     // Histidine
237     rotamerResidueMap.put(AminoAcidUtils.AminoAcid3.HID, AminoAcidUtils.AminoAcid3.HIS);
238     rotamerResidueMap.put(AminoAcidUtils.AminoAcid3.HIE, AminoAcidUtils.AminoAcid3.HIS);
239     // Aspartate
240     rotamerResidueMap.put(AminoAcidUtils.AminoAcid3.ASP, AminoAcidUtils.AminoAcid3.ASH);
241     // Glutamate
242     rotamerResidueMap.put(AminoAcidUtils.AminoAcid3.GLU, AminoAcidUtils.AminoAcid3.GLH);
243     //Cysteine
244     rotamerResidueMap.put(AminoAcidUtils.AminoAcid3.CYD, AminoAcidUtils.AminoAcid3.CYS);
245   }
246 
247   /**
248    * Constructor for PDBFilter.
249    *
250    * @param files a {@link java.util.List} object.
251    * @param molecularAssembly a {@link ffx.potential.MolecularAssembly} object.
252    * @param forceField a {@link ffx.potential.parameters.ForceField} object.
253    * @param properties a {@link org.apache.commons.configuration2.CompositeConfiguration}
254    *     object.
255    */
256   public PDBFilter(List<File> files, MolecularAssembly molecularAssembly, ForceField forceField,
257                    CompositeConfiguration properties) {
258     super(files, molecularAssembly, forceField, properties);
259     bondList = new ArrayList<>();
260     this.fileType = FileType.PDB;
261     readFile = files.get(0);
262   }
263 
264   /**
265    * Parse the PDB File from a URL.
266    *
267    * @param file a {@link java.io.File} object.
268    * @param molecularAssembly a {@link ffx.potential.MolecularAssembly} object.
269    * @param forceField a {@link ffx.potential.parameters.ForceField} object.
270    * @param properties a {@link org.apache.commons.configuration2.CompositeConfiguration}
271    *     object.
272    */
273   public PDBFilter(File file, MolecularAssembly molecularAssembly, ForceField forceField,
274                    CompositeConfiguration properties) {
275     super(file, molecularAssembly, forceField, properties);
276     bondList = new ArrayList<>();
277     this.fileType = FileType.PDB;
278     readFile = file;
279   }
280 
281   /**
282    * Parse the PDB File from a URL.
283    *
284    * @param file a {@link java.io.File} object.
285    * @param molecularAssemblies a {@link java.util.List} object.
286    * @param forceField a {@link ffx.potential.parameters.ForceField} object.
287    * @param properties a {@link org.apache.commons.configuration2.CompositeConfiguration}
288    *     object.
289    */
290   public PDBFilter(File file, List<MolecularAssembly> molecularAssemblies, ForceField forceField,
291                    CompositeConfiguration properties) {
292     super(file, molecularAssemblies, forceField, properties);
293     bondList = new ArrayList<>();
294     this.fileType = FileType.PDB;
295     readFile = file;
296   }
297 
298   /**
299    * Constructor for PDBFilter with residue numbers.
300    *
301    * @param file a {@link java.util.List} object.
302    * @param molecularAssembly a {@link ffx.potential.MolecularAssembly} object.
303    * @param forceField a {@link ffx.potential.parameters.ForceField} object.
304    * @param properties a {@link org.apache.commons.configuration2.CompositeConfiguration}
305    *     object.
306    * @param resNumberList a List of integer residue numbers for constant pH rotamer
307    *     optimization.
308    */
309   public PDBFilter(File file, MolecularAssembly molecularAssembly, ForceField forceField,
310                    CompositeConfiguration properties, List<Integer> resNumberList) {
311     super(file, molecularAssembly, forceField, properties);
312     bondList = new ArrayList<>();
313     this.fileType = FileType.PDB;
314     this.readFile = file;
315     this.resNumberList = resNumberList;
316     //this.chainList = chainList;
317   }
318 
319 
320   /**
321    * Simple method useful for converting files to PDB format.
322    *
323    * @param atom a {@link ffx.potential.bonded.Atom} object.
324    * @return Returns a PDB ATOM or HETATM record for the passed Atom.
325    */
326   public static String toPDBAtomLine(Atom atom) {
327     StringBuilder sb;
328     if (atom.isHetero()) {
329       sb = new StringBuilder("HETATM");
330     } else {
331       sb = new StringBuilder("ATOM  ");
332     }
333     sb.append(repeat(" ", 74));
334 
335     String name = atom.getName();
336     int nameLength = name.length();
337     if (nameLength > 4) {
338       name = name.substring(0, 4);
339     } else if (nameLength == 1) {
340       name = name + "  ";
341     } else if (nameLength == 2) {
342       name = name + " ";
343     }
344     int serial = atom.getXyzIndex();
345     sb.replace(6, 16, format("%5s " + padLeft(name.toUpperCase(), 4), Hybrid36.encode(5, serial)));
346 
347     Character altLoc = atom.getAltLoc();
348     if (altLoc != null) {
349       sb.setCharAt(16, altLoc);
350     } else {
351       char blankChar = ' ';
352       sb.setCharAt(16, blankChar);
353     }
354 
355     String resName = atom.getResidueName();
356     sb.replace(17, 20, padLeft(resName.toUpperCase(), 3));
357 
358     char chain = atom.getChainID();
359     sb.setCharAt(21, chain);
360 
361     int resID = atom.getResidueNumber();
362     sb.replace(22, 26, format("%4s", Hybrid36.encode(4, resID)));
363 
364     double[] xyz = atom.getXYZ(null);
365     StringBuilder decimals = new StringBuilder();
366     for (int i = 0; i < 3; i++) {
367       try {
368         decimals.append(StringUtils.fwFpDec(xyz[i], 8, 3));
369       } catch (IllegalArgumentException ex) {
370         String newValue = StringUtils.fwFpTrunc(xyz[i], 8, 3);
371         logger.info(
372                 format(" XYZ coordinate %8.3f for atom %s overflowed PDB format and is truncated to %s.",
373                         xyz[i], atom, newValue));
374         decimals.append(newValue);
375       }
376     }
377     try {
378       decimals.append(StringUtils.fwFpDec(atom.getOccupancy(), 6, 2));
379     } catch (IllegalArgumentException ex) {
380       logger.severe(
381               format(" Occupancy %6.2f for atom %s must be between 0 and 1.", atom.getOccupancy(),
382                       atom));
383     }
384     try {
385       decimals.append(StringUtils.fwFpDec(atom.getTempFactor(), 6, 2));
386     } catch (IllegalArgumentException ex) {
387       String newValue = StringUtils.fwFpTrunc(atom.getTempFactor(), 6, 2);
388       logger.info(
389               format(" B-factor %6.2f for atom %s overflowed the PDB format and is truncated to %s.",
390                       atom.getTempFactor(), atom, newValue));
391       decimals.append(newValue);
392     }
393 
394     sb.replace(30, 66, decimals.toString());
395     sb.replace(78, 80, format("%2d", 0));
396     sb.append("\n");
397     return sb.toString();
398   }
399 
400   public void setConstantPH(boolean constantPH) {
401     this.constantPH = constantPH;
402   }
403 
404   public void setRotamerTitration(boolean rotamerTitration) {
405     this.rotamerTitration = rotamerTitration;
406   }
407 
408   /** clearSegIDs */
409   public void clearSegIDs() {
410     segIDs.clear();
411   }
412 
413   /** {@inheritDoc} */
414   @Override
415   public void closeReader() {
416     for (MolecularAssembly system : systems) {
417       BufferedReader br = readers.get(system);
418       if (br != null) {
419         try {
420           br.close();
421         } catch (IOException ex) {
422           logger.warning(format(" Exception in closing system %s: %s", system.toString(), ex));
423         }
424       }
425     }
426   }
427 
428   @Override
429   public int countNumModels() {
430     Set<File> files = systems.stream().map(MolecularAssembly::getFile).map(File::toString).distinct()
431             .map(File::new).collect(Collectors.toSet());
432 
433     // Dangers of parallelism are minimized by: unique files/filenames, read-only access.
434     return files.parallelStream().mapToInt((File fi) -> {
435       int nModelsLocal = 0;
436       try (BufferedReader br = new BufferedReader(new FileReader(fi))) {
437         String line = br.readLine();
438         while (line != null) {
439           if (line.startsWith("MODEL")) {
440             ++nModelsLocal;
441           }
442           line = br.readLine();
443         }
444         nModelsLocal = Math.max(1, nModelsLocal);
445       } catch (IOException ex) {
446         logger.info(format(" Exception in parsing file %s: %s", fi, ex));
447       }
448       return nModelsLocal;
449     }).sum();
450   }
451 
452   /**
453    * Get the list of alternate locations encountered.
454    *
455    * @return the alternate location list.
456    */
457   public List<Character> getAltLocs() {
458     return altLocs;
459   }
460 
461   /** {@inheritDoc} */
462   @Override
463   public OptionalDouble getLastReadLambda() {
464     return Double.isNaN(lastReadLambda) ? OptionalDouble.empty() : OptionalDouble.of(lastReadLambda);
465   }
466 
467   /**
468    * Returns all the remark lines found by the last readFile call.
469    *
470    * @return Remark lines from the last readFile call.
471    */
472   @Override
473   public String[] getRemarkLines() {
474     int nRemarks = remarkLines.size();
475     return remarkLines.toArray(new String[nRemarks]);
476   }
477 
478   @Override
479   public int getSnapshot() {
480     return modelsRead;
481   }
482 
483   /**
484    * Mutate residue(s) as the PDB file is being parsed.
485    *
486    * @param mutations a {@link java.util.List} object.
487    */
488   public void mutate(List<Mutation> mutations) {
489     mutate = true;
490     if (this.mutations == null) {
491       this.mutations = new ArrayList<>();
492     }
493     this.mutations.addAll(mutations);
494   }
495 
496   /** Parse the PDB File */
497   @Override
498   public boolean readFile() {
499     remarkLines = new ArrayList<>();
500     // First atom is #1, to match xyz file format
501     int xyzIndex = 1;
502     setFileRead(false);
503     systems.add(activeMolecularAssembly);
504 
505     List<String> conects = new ArrayList<>();
506     List<String> links = new ArrayList<>();
507     List<String> ssbonds = new ArrayList<>();
508     List<String> structs = new ArrayList<>();
509     try {
510       for (File file : files) {
511         currentFile = file;
512         if (mutate) {
513           List<Character> chainIDs = new ArrayList<>();
514           try (BufferedReader br = new BufferedReader(new FileReader(file))) {
515             String line = br.readLine();
516             while (line != null) {
517               // Replace all tabs w/ 4x spaces
518               line = line.replaceAll("\t", "    ");
519               String identity = line;
520               if (line.length() > 6) {
521                 identity = line.substring(0, 6);
522               }
523               identity = identity.trim().toUpperCase();
524               Record record;
525               try {
526                 record = Record.valueOf(identity);
527               } catch (Exception e) {
528                 // Continue until the record is recognized.
529                 line = br.readLine();
530                 continue;
531               }
532               switch (record) {
533                 case ANISOU, HETATM, ATOM -> {
534                   char c22 = line.charAt(21);
535                   boolean idFound = false;
536                   for (Character chainID : chainIDs) {
537                     if (c22 == chainID) {
538                       idFound = true;
539                       break;
540                     }
541                   }
542                   if (!idFound) {
543                     chainIDs.add(c22);
544                   }
545                 }
546               }
547               line = br.readLine();
548             }
549             for (Mutation mtn : mutations) {
550               if (!chainIDs.contains(mtn.chainChar)) {
551                 if (chainIDs.size() == 1) {
552                   logger.warning(
553                           format(" Chain ID %c for mutation not found: only one chain %c found.",
554                                   mtn.chainChar, chainIDs.get(0)));
555                 } else {
556                   logger.warning(
557                           format(" Chain ID %c for mutation not found: mutation will not proceed.",
558                                   mtn.chainChar));
559                 }
560               }
561             }
562           } catch (IOException ioException) {
563             logger.fine(format(" Exception %s in parsing file to find chain IDs", ioException));
564           }
565         }
566 
567         // Check that the current file exists and that we can read it.
568         if (currentFile == null || !currentFile.exists() || !currentFile.canRead()) {
569           return false;
570         }
571 
572         // Open the current file for parsing.
573         try (BufferedReader br = new BufferedReader(new FileReader(currentFile))) {
574           // Echo the alternate location being parsed.
575           if (currentAltLoc == 'A') {
576             logger.info(format(" Reading %s", currentFile.getName()));
577           } else {
578             logger.info(format(" Reading %s alternate location %s", currentFile.getName(), currentAltLoc));
579 
580           }
581           activeMolecularAssembly.setAlternateLocation(currentAltLoc);
582 
583           // Reset the current chain and segID.
584           currentChainID = null;
585           currentSegID = null;
586           boolean containsInsCode = false;
587 
588           // Read the first line of the file.
589           String line = br.readLine();
590 
591           // Parse until END or ENDMDL is found, or to the end of the file.
592           while (line != null) {
593             // Replace all tabs w/ 4x spaces
594             line = line.replaceAll("\t", "    ");
595             String identity = line;
596             if (line.length() > 6) {
597               identity = line.substring(0, 6);
598             }
599             identity = identity.trim().toUpperCase();
600             Record record;
601             try {
602               record = Record.valueOf(identity);
603             } catch (Exception e) {
604               // Continue until the record is recognized.
605               line = br.readLine();
606               continue;
607             }
608 
609             // Switch on the known record.
610             switch (record) {
611               case ENDMDL:
612               case END:
613                 // Setting "line" to null will exit the loop.
614                 line = null;
615                 continue;
616               case DBREF:
617 // =============================================================================
618 //  1 -  6       Record name   "DBREF "
619 //  8 - 11       IDcode        idCode             ID code of this entry.
620 // 13            Character     chainID            Chain identifier.
621 // 15 - 18       Integer       seqBegin           Initial sequence number of the
622 //                                                PDB sequence segment.
623 // 19            AChar         insertBegin        Initial insertion code of the
624 //                                                PDB sequence segment.
625 // 21 - 24       Integer       seqEnd             Ending sequence number of the
626 //                                                PDB sequence segment.
627 // 25            AChar         insertEnd          Ending insertion code of the
628 //                                                PDB sequence segment.
629 // 27 - 32       LString       database           Sequence database name.
630 // 34 - 41       LString       dbAccession        Sequence database accession code.
631 // 43 - 54       LString       dbIdCode           Sequence  database identification code.
632 // 56 - 60       Integer       dbseqBegin         Initial sequence number of the
633 //                                                database seqment.
634 // 61            AChar         idbnsBeg           Insertion code of initial residue of the
635 //                                                segment, if PDB is the reference.
636 // 63 - 67       Integer       dbseqEnd           Ending sequence number of the
637 //                                                database segment.
638 // 68            AChar         dbinsEnd           Insertion code of the ending residue of
639 //                                                the segment, if PDB is the reference.
640 // =============================================================================
641                 Character chainID = line.substring(12, 13).toUpperCase().charAt(0);
642                 int seqBegin = parseInt(line.substring(14, 18).trim());
643                 int seqEnd = parseInt(line.substring(20, 24).trim());
644                 int[] seqRange = dbRef.computeIfAbsent(chainID, k -> new int[2]);
645                 seqRange[0] = seqBegin;
646                 seqRange[1] = seqEnd;
647                 break;
648               case SEQRES:
649 // =============================================================================
650 //  1 -  6        Record name    "SEQRES"
651 //  8 - 10        Integer        serNum       Serial number of the SEQRES record for the
652 //                                            current  chain. Starts at 1 and increments
653 //                                            by one  each line. Reset to 1 for each chain.
654 // 12             Character      chainID      Chain identifier. This may be any single
655 //                                            legal  character, including a blank which is
656 //                                            is used if there is only one chain.
657 // 14 - 17        Integer        numRes       Number of residues in the chain.
658 //                                            This  value is repeated on every record.
659 // 20 - 22        Residue name   resName      Residue name.
660 // 24 - 26        Residue name   resName      Residue name.
661 // 28 - 30        Residue name   resName      Residue name.
662 // 32 - 34        Residue name   resName      Residue name.
663 // 36 - 38        Residue name   resName      Residue name.
664 // 40 - 42        Residue name   resName      Residue name.
665 // 44 - 46        Residue name   resName      Residue name.
666 // 48 - 50        Residue name   resName      Residue name.
667 // 52 - 54        Residue name   resName      Residue name.
668 // 56 - 58        Residue name   resName      Residue name.
669 // 60 - 62        Residue name   resName      Residue name.
670 // 64 - 66        Residue name   resName      Residue name.
671 // 68 - 70        Residue name   resName      Residue name.
672 // =============================================================================
673                 activeMolecularAssembly.addHeaderLine(line);
674                 chainID = line.substring(11, 12).toUpperCase().charAt(0);
675                 int serNum = parseInt(line.substring(7, 10).trim());
676                 String[] chain = seqRes.get(chainID);
677                 int numRes = parseInt(line.substring(13, 17).trim());
678                 if (chain == null) {
679                   chain = new String[numRes];
680                   seqRes.put(chainID, chain);
681                 }
682                 int resID = (serNum - 1) * 13;
683                 int end = line.length();
684                 for (int start = 19; start + 3 <= end; start += 4) {
685                   String res = line.substring(start, start + 3).trim();
686                   if (res.isEmpty()) {
687                     break;
688                   }
689                   chain[resID++] = res;
690                 }
691                 break;
692               case MODRES:
693                 String modResName = line.substring(12, 15).trim();
694                 String stdName = line.substring(24, 27).trim();
695                 modRes.put(modResName.toUpperCase(), stdName.toUpperCase());
696                 activeMolecularAssembly.addHeaderLine(line);
697 // =============================================================================
698 //  1 -  6        Record name     "MODRES"
699 //  8 - 11        IDcode          idCode         ID code of this entry.
700 // 13 - 15        Residue name    resName        Residue name used in this entry.
701 // 17             Character       chainID        Chain identifier.
702 // 19 - 22        Integer         seqNum         Sequence number.
703 // 23             AChar           iCode          Insertion code.
704 // 25 - 27        Residue name    stdRes         Standard residue name.
705 // 30 - 70        String          comment        Description of the residue modification.
706 // =============================================================================
707                 break;
708               case ANISOU:
709 // =============================================================================
710 //  1 - 6        Record name   "ANISOU"
711 //  7 - 11       Integer       serial         Atom serial number.
712 // 13 - 16       Atom          name           Atom name.
713 // 17            Character     altLoc         Alternate location indicator
714 // 18 - 20       Residue name  resName        Residue name.
715 // 22            Character     chainID        Chain identifier.
716 // 23 - 26       Integer       resSeq         Residue sequence number.
717 // 27            AChar         iCode          Insertion code.
718 // 29 - 35       Integer       u[0][0]        U(1,1)
719 // 36 - 42       Integer       u[1][1]        U(2,2)
720 // 43 - 49       Integer       u[2][2]        U(3,3)
721 // 50 - 56       Integer       u[0][1]        U(1,2)
722 // 57 - 63       Integer       u[0][2]        U(1,3)
723 // 64 - 70       Integer       u[1][2]        U(2,3)
724 // 77 - 78       LString(2)    element        Element symbol, right-justified.
725 // 79 - 80       LString(2)    charge         Charge on the atom.
726 // =============================================================================
727                 boolean deleteAnisou = properties.getBoolean("delete-anisou", false);
728                 double resetBfactors = properties.getDouble("reset-bfactors", -1.0);
729                 if (deleteAnisou || resetBfactors >= 0.0) {
730                   break;
731                 }
732                 Integer serial = Hybrid36.decode(5, line.substring(6, 11));
733                 Character altLoc = line.substring(16, 17).toUpperCase().charAt(0);
734                 if (!altLocs.contains(altLoc)) {
735                   altLocs.add(altLoc);
736                 }
737                 if (!altLoc.equals(' ') && !altLoc.equals('A') && !altLoc.equals(currentAltLoc)) {
738                   break;
739                 }
740                 double[] adp = new double[6];
741                 adp[0] = parseInt(line.substring(28, 35).trim()) * 1.0e-4;
742                 adp[1] = parseInt(line.substring(35, 42).trim()) * 1.0e-4;
743                 adp[2] = parseInt(line.substring(42, 49).trim()) * 1.0e-4;
744                 adp[3] = parseInt(line.substring(49, 56).trim()) * 1.0e-4;
745                 adp[4] = parseInt(line.substring(56, 63).trim()) * 1.0e-4;
746                 adp[5] = parseInt(line.substring(63, 70).trim()) * 1.0e-4;
747                 if (atoms.containsKey(serial)) {
748                   Atom a = atoms.get(serial);
749                   a.setAltLoc(altLoc);
750                   a.setAnisou(adp);
751                 } else {
752                   logger.info(
753                           format(" No ATOM record for ANISOU serial number %d has been found.", serial));
754                   logger.info(format(" This ANISOU record will be ignored:\n %s", line));
755                 }
756                 break;
757               case ATOM:
758 // =============================================================================
759 //  1 -  6        Record name   "ATOM  "
760 //  7 - 11        Integer       serial       Atom serial number.
761 // 13 - 16        Atom          name         Atom name.
762 // 17             Character     altLoc       Alternate location indicator.
763 // 18 - 20        Residue name  resName      Residue name.
764 // 22             Character     chainID      Chain identifier.
765 // 23 - 26        Integer       resSeq       Residue sequence number.
766 // 27             AChar         iCode        Code for insertion of residues.
767 // 31 - 38        Real(8.3)     x            Orthogonal coordinates for X in Angstroms.
768 // 39 - 46        Real(8.3)     y            Orthogonal coordinates for Y in Angstroms.
769 // 47 - 54        Real(8.3)     z            Orthogonal coordinates for Z in Angstroms.
770 // 55 - 60        Real(6.2)     occupancy    Occupancy.
771 // 61 - 66        Real(6.2)     tempFactor   Temperature factor.
772 // 77 - 78        LString(2)    element      Element symbol, right-justified.
773 // 79 - 80        LString(2)    charge       Charge  on the atom.
774 // =============================================================================
775                 String name;
776                 String resName;
777                 String segID;
778                 int resSeq;
779                 boolean printAtom;
780                 double[] d;
781                 double occupancy;
782                 double tempFactor;
783                 Atom newAtom;
784                 Atom returnedAtom;
785                 // If it's a misnamed water, it will fall through to HETATM.
786                 if (!line.substring(17, 20).trim().equals("HOH")) {
787                   serial = Hybrid36.decode(5, line.substring(6, 11));
788                   name = line.substring(12, 16).trim();
789                   if (name.toUpperCase().contains("1H") || name.toUpperCase().contains("2H")
790                           || name.toUpperCase().contains("3H")) {
791                     // VERSION3_2 is presently just a placeholder for "anything non-standard".
792                     fileStandard = VERSION3_2;
793                   }
794                   altLoc = line.substring(16, 17).toUpperCase().charAt(0);
795                   if (!altLocs.contains(altLoc)) {
796                     altLocs.add(altLoc);
797                   }
798 
799                   if (!altLoc.equals(' ') && !altLoc.equals('A') && !altLoc.equals(currentAltLoc)) {
800                     break;
801                   }
802 
803                   resName = line.substring(17, 20).trim();
804                   chainID = line.substring(21, 22).charAt(0);
805                   segID = getSegID(chainID);
806                   resSeq = Hybrid36.decode(4, line.substring(22, 26));
807 
808                   char insertionCode = line.charAt(26);
809                   if (insertionCode != ' ' && !containsInsCode) {
810                     containsInsCode = true;
811                     logger.warning(
812                             " FFX support for files with " + "insertion codes is experimental. "
813                                     + "Residues will be renumbered to " + "eliminate insertion codes (52A "
814                                     + "becomes 53, 53 becomes 54, etc)");
815                   }
816 
817                   int offset = insertionCodeCount.getOrDefault(chainID, 0);
818                   String pdbResNum = format("%c%d%c", chainID, resSeq, insertionCode);
819                   if (!pdbToNewResMap.containsKey(pdbResNum)) {
820                     if (insertionCode != ' ') {
821                       ++offset;
822                       insertionCodeCount.put(chainID, offset);
823                     }
824                     resSeq += offset;
825                     if (offset != 0) {
826                       logger.info(
827                               format(" Chain %c " + "residue %s-%s renumbered to %c %s-%d", chainID,
828                                       pdbResNum.substring(1).trim(), resName, chainID, resName, resSeq));
829                     }
830                     String newNum = format("%c%d", chainID, resSeq);
831                     pdbToNewResMap.put(pdbResNum, newNum);
832                   } else {
833                     resSeq += offset;
834                   }
835 
836                   printAtom = false;
837                   if (mutate) {
838                     boolean doBreak = false;
839                     for (Mutation mtn : mutations) {
840                       if (chainID == mtn.chainChar && resSeq == mtn.resID) {
841                         mtn.origResName = resName;
842                         resName = mtn.resName;
843                         String atomName = name.toUpperCase();
844 
845                         int isAA = AminoAcidUtils.getAminoAcidNumber(resName);
846                         int isNA = NucleicAcidUtils.getNucleicAcidNumber(resName);
847 
848                         if ((isNA != -1 && naBackboneNames.contains(atomName)) || (isAA != -1 && backboneNames.contains(atomName))) {
849                           printAtom = true;
850                         } else {
851                           // grab pur-pur or pyr-pyr alchem. atoms
852                           ArrayList<String> alchAtoms = mtn.getAlchemicalAtoms(false);
853                           if (alchAtoms == null) {
854                             // test to see if atom is involved in glycosyl torsion and if needs renaming (pyr-pur/pur-pyr)
855                             String newName = mtn.isNonAlchemicalAtom(atomName);
856                             if (newName != null) { // if not null -- use name it
857                               printAtom = true;
858                               if (newName.startsWith("~")) { // switch from purine to pyrmidine or v.v.
859                                 // switch name and include it as an alchemical atom
860                                 name = newName.substring(1);
861                                 logger.info(format(" DELETING atom %d %s of %s %d in chain %s", serial, atomName, resName, resSeq, chainID));
862                               } else {
863                                 // replace name but do not include as an alchemical atom
864                                 name = newName;
865                               }
866                               doBreak = false;
867                             } else if (!atomName.contains("'")) {
868                               logger.info(format(" DELETING atom %d %s of %s %d in chain %s", serial, atomName, resName, resSeq, chainID));
869                               doBreak = true;
870                             } else {
871                               printAtom = true;
872                               doBreak = false;
873                             }
874                           } else {
875                             if (alchAtoms.contains(atomName) && !atomName.contains("'")) {
876                               logger.info(format(" DELETING atom %d %s of %s %d in chain %s", serial, atomName, resName, resSeq, chainID));
877                               doBreak = true;
878                             } else {
879                               printAtom = true;
880                               doBreak = false;
881                             }
882                           }
883                           break;
884                         }
885                       }
886                     }
887                     if (doBreak) {
888                       break;
889                     }
890                   }
891 
892                   if (constantPH) {
893                     AminoAcid3 aa3 = AminoAcidUtils.getAminoAcid(resName.toUpperCase());
894                     if (constantPHResidueMap.containsKey(aa3)) {
895                       String atomName = name.toUpperCase();
896                       AminoAcid3 aa3PH = constantPHResidueMap.get(aa3);
897                       resName = aa3PH.name();
898                       if (constantPhBackboneNames.contains(atomName)) {
899                         logger.info(format(" %s-%d %s", resName, resSeq, atomName));
900                       } else if (!atomName.startsWith("H")) {
901                         logger.info(format(" %s-%d %s", resName, resSeq, atomName));
902                       } else {
903                         logger.info(format(" %s-%d %s skipped", resName, resSeq, atomName));
904                         break;
905                       }
906                     }
907                   } else if (rotamerTitration) {
908                     AminoAcid3 aa3 = AminoAcidUtils.getAminoAcid(resName.toUpperCase());
909                     if (rotamerResidueMap.containsKey(aa3) && resNumberList.contains(resSeq)) {
910                       AminoAcid3 aa3rotamer = rotamerResidueMap.get(aa3);
911                       resName = aa3rotamer.name();
912                     }
913                   }
914                   d = new double[3];
915                   d[0] = parseDouble(line.substring(30, 38).trim());
916                   d[1] = parseDouble(line.substring(38, 46).trim());
917                   d[2] = parseDouble(line.substring(46, 54).trim());
918                   occupancy = 1.0;
919                   tempFactor = 1.0;
920                   try {
921                     occupancy = parseDouble(line.substring(54, 60).trim());
922                     tempFactor = parseDouble(line.substring(60, 66).trim());
923                   } catch (NumberFormatException | StringIndexOutOfBoundsException e) {
924                     // Use default values.
925                     if (printMissingFields) {
926                       logger.info(" Missing occupancy and b-factors set to 1.0.");
927                       printMissingFields = false;
928                     } else if (logger.isLoggable(Level.FINE)) {
929                       logger.fine(" Missing occupancy and b-factors set to 1.0.");
930                     }
931                   }
932 
933                   double bfactor = properties.getDouble("reset-bfactors", -1.0);
934                   if (bfactor >= 0.0) {
935                     tempFactor = bfactor;
936                   }
937 
938                   newAtom = new Atom(0, name, altLoc, d, resName, resSeq, chainID, occupancy, tempFactor, segID);
939 
940                   // Check if this is a modified residue.
941                   if (modRes.containsKey(resName.toUpperCase())) {
942                     newAtom.setModRes(true);
943                   }
944                   returnedAtom = (Atom) activeMolecularAssembly.addMSNode(newAtom);
945                   if (returnedAtom != newAtom) {
946                     // A previously added atom has been retained.
947                     atoms.put(serial, returnedAtom);
948                     if (logger.isLoggable(Level.FINE)) {
949                       logger.fine(returnedAtom + " has been retained over\n" + newAtom);
950                     }
951                   } else {
952                     // The new atom has been added.
953                     atoms.put(serial, newAtom);
954                     // Check if the newAtom took the xyzIndex of a previous alternate conformer.
955                     if (newAtom.getIndex() == 0) {
956                       newAtom.setXyzIndex(xyzIndex++);
957                     }
958                     if (printAtom) {
959                       logger.info(newAtom.toString());
960                     }
961                   }
962                   break;
963                 }
964                 break;
965               case HETATM:
966 // =============================================================================
967 //  1 - 6        Record name    "HETATM"
968 //  7 - 11       Integer        serial        Atom serial number.
969 // 13 - 16       Atom           name          Atom name.
970 // 17            Character      altLoc        Alternate location indicator.
971 // 18 - 20       Residue name   resName       Residue name.
972 // 22            Character      chainID       Chain identifier.
973 // 23 - 26       Integer        resSeq        Residue sequence number.
974 // 27            AChar          iCode         Code for insertion of residues.
975 // 31 - 38       Real(8.3)      x             Orthogonal coordinates for X.
976 // 39 - 46       Real(8.3)      y             Orthogonal coordinates for Y.
977 // 47 - 54       Real(8.3)      z             Orthogonal coordinates for Z.
978 // 55 - 60       Real(6.2)      occupancy     Occupancy.
979 // 61 - 66       Real(6.2)      tempFactor    Temperature factor.
980 // 77 - 78       LString(2)     element       Element symbol; right-justified.
981 // 79 - 80       LString(2)     charge        Charge on the atom.
982 // =============================================================================
983                 serial = Hybrid36.decode(5, line.substring(6, 11));
984                 name = line.substring(12, 16).trim();
985                 altLoc = line.substring(16, 17).toUpperCase().charAt(0);
986                 if (!altLocs.contains(altLoc)) {
987                   altLocs.add(altLoc);
988                 }
989                 if (!altLoc.equals(' ') && !altLoc.equals(currentAltLoc)) {
990                   break;
991                 }
992                 // if (!altLoc.equals(' ') && !altLoc.equals('A') && !altLoc.equals(currentAltLoc)) {
993                 //  break;
994                 //}
995                 resName = line.substring(17, 20).trim();
996                 chainID = line.substring(21, 22).charAt(0);
997                 segID = getSegID(chainID);
998                 resSeq = Hybrid36.decode(4, line.substring(22, 26));
999 
1000                 char insertionCode = line.charAt(26);
1001                 if (insertionCode != ' ' && !containsInsCode) {
1002                   containsInsCode = true;
1003                   logger.warning(" FFX support for files with " + "insertion codes is experimental. "
1004                           + "Residues will be renumbered to " + "eliminate insertion codes (52A "
1005                           + "becomes 53, 53 becomes 54, etc)");
1006                 }
1007 
1008                 int offset = insertionCodeCount.getOrDefault(chainID, 0);
1009                 String pdbResNum = format("%c%d%c", chainID, resSeq, insertionCode);
1010                 if (!pdbToNewResMap.containsKey(pdbResNum)) {
1011                   if (insertionCode != ' ') {
1012                     ++offset;
1013                     insertionCodeCount.put(chainID, offset);
1014                   }
1015                   resSeq += offset;
1016                   if (offset != 0) {
1017                     logger.info(
1018                             format(" Chain %c " + "molecule %s-%s renumbered to %c %s-%d", chainID,
1019                                     pdbResNum.substring(1).trim(), resName, chainID, resName, resSeq));
1020                   }
1021                   String newNum = format("%c%d", chainID, resSeq);
1022                   pdbToNewResMap.put(pdbResNum, newNum);
1023                 } else {
1024                   resSeq += offset;
1025                 }
1026 
1027                 d = new double[3];
1028                 d[0] = parseDouble(line.substring(30, 38).trim());
1029                 d[1] = parseDouble(line.substring(38, 46).trim());
1030                 d[2] = parseDouble(line.substring(46, 54).trim());
1031                 occupancy = 1.0;
1032                 tempFactor = 1.0;
1033                 try {
1034                   occupancy = parseDouble(line.substring(54, 60).trim());
1035                   tempFactor = parseDouble(line.substring(60, 66).trim());
1036                 } catch (NumberFormatException | StringIndexOutOfBoundsException e) {
1037                   // Use default values.
1038                   if (printMissingFields) {
1039                     logger.info(" Missing occupancy and b-factors set to 1.0.");
1040                     printMissingFields = false;
1041                   } else if (logger.isLoggable(Level.FINE)) {
1042                     logger.fine(" Missing occupancy and b-factors set to 1.0.");
1043                   }
1044                 }
1045 
1046                 double bfactor = properties.getDouble("reset-bfactors", -1.0);
1047                 if (bfactor >= 0.0) {
1048                   tempFactor = bfactor;
1049                 }
1050 
1051                 newAtom = new Atom(0, name, altLoc, d, resName, resSeq, chainID, occupancy, tempFactor, segID);
1052                 newAtom.setHetero(true);
1053                 // Check if this is a modified residue.
1054                 if (modRes.containsKey(resName.toUpperCase())) {
1055                   newAtom.setModRes(true);
1056                 }
1057                 returnedAtom = (Atom) activeMolecularAssembly.addMSNode(newAtom);
1058                 if (returnedAtom != newAtom) {
1059                   // A previously added atom has been retained.
1060                   atoms.put(serial, returnedAtom);
1061                   if (logger.isLoggable(Level.FINE)) {
1062                     logger.fine(returnedAtom + " has been retained over\n" + newAtom);
1063                   }
1064                 } else {
1065                   // The new atom has been added.
1066                   atoms.put(serial, newAtom);
1067                   newAtom.setXyzIndex(xyzIndex++);
1068                 }
1069                 break;
1070               case CRYST1:
1071 // =============================================================================
1072 // The CRYST1 record presents the unit cell parameters, space group, and Z
1073 // value. If the structure was not determined by crystallographic means, CRYST1
1074 // simply provides the unitary values, with an appropriate REMARK.
1075 //
1076 //  7 - 15       Real(9.3)     a              a (Angstroms).
1077 // 16 - 24       Real(9.3)     b              b (Angstroms).
1078 // 25 - 33       Real(9.3)     c              c (Angstroms).
1079 // 34 - 40       Real(7.2)     alpha          alpha (degrees).
1080 // 41 - 47       Real(7.2)     beta           beta (degrees).
1081 // 48 - 54       Real(7.2)     gamma          gamma (degrees).
1082 // 56 - 66       LString       sGroup         Space  group.
1083 // 67 - 70       Integer       z              Z value.
1084 // =============================================================================
1085                 if (line.length() < 55) {
1086                   logger.severe(" CRYST1 record is improperly formatted.");
1087                 }
1088                 double aaxis = parseDouble(line.substring(6, 15).trim());
1089                 double baxis = parseDouble(line.substring(15, 24).trim());
1090                 double caxis = parseDouble(line.substring(24, 33).trim());
1091                 double alpha = parseDouble(line.substring(33, 40).trim());
1092                 double beta = parseDouble(line.substring(40, 47).trim());
1093                 double gamma = parseDouble(line.substring(47, 54).trim());
1094                 int limit = min(line.length(), 66);
1095                 String sg = line.substring(55, limit).trim();
1096                 properties.addProperty("a-axis", aaxis);
1097                 properties.addProperty("b-axis", baxis);
1098                 properties.addProperty("c-axis", caxis);
1099                 properties.addProperty("alpha", alpha);
1100                 properties.addProperty("beta", beta);
1101                 properties.addProperty("gamma", gamma);
1102                 properties.addProperty("spacegroup", SpaceGroupInfo.pdb2ShortName(sg));
1103                 break;
1104               case CONECT:
1105 // =============================================================================
1106 //  7 - 11        Integer        serial       Atom  serial number
1107 // 12 - 16        Integer        serial       Serial number of bonded atom
1108 // 17 - 21        Integer        serial       Serial number of bonded atom
1109 // 22 - 26        Integer        serial       Serial number of bonded atom
1110 // 27 - 31        Integer        serial       Serial number of bonded atom
1111 //
1112 // CONECT records involving atoms for which the coordinates are not present
1113 // in the entry (e.g., symmetry-generated) are not given.
1114 // CONECT records involving atoms for which the coordinates are missing due
1115 // to disorder, are also not provided.
1116 // =============================================================================
1117                 conects.add(line);
1118                 break;
1119               case LINK:
1120 // =============================================================================
1121 // The LINK records specify connectivity between residues that is not implied by
1122 // the primary structure. Connectivity is expressed in terms of the atom names.
1123 // They also include the distance associated with each linkage following the
1124 // symmetry operations at the end of each record.
1125 // 13 - 16         Atom           name1           Atom name.
1126 // 17              Character      altLoc1         Alternate location indicator.
1127 // 18 - 20         Residue name   resName1        Residue  name.
1128 // 22              Character      chainID1        Chain identifier.
1129 // 23 - 26         Integer        resSeq1         Residue sequence number.
1130 // 27              AChar          iCode1          Insertion code.
1131 // 43 - 46         Atom           name2           Atom name.
1132 // 47              Character      altLoc2         Alternate location indicator.
1133 // 48 - 50         Residue name   resName2        Residue name.
1134 // 52              Character      chainID2        Chain identifier.
1135 // 53 - 56         Integer        resSeq2         Residue sequence number.
1136 // 57              AChar          iCode2          Insertion code.
1137 // 60 - 65         SymOP          sym1            Symmetry operator atom 1.
1138 // 67 - 72         SymOP          sym2            Symmetry operator atom 2.
1139 // 74 – 78         Real(5.2)      Length          Link distance
1140 // =============================================================================
1141                 char a1 = line.charAt(16);
1142                 char a2 = line.charAt(46);
1143                 if (a1 != a2) {
1144                   // logger.info(format(" Ignoring LINK record as alternate locations do not match\n
1145                   // %s.", line));
1146                   break;
1147                 }
1148                 if (currentAltLoc == 'A') {
1149                   if ((a1 == ' ' || a1 == 'A') && (a2 == ' ' || a2 == 'A')) {
1150                     links.add(line);
1151                   }
1152                 } else if (a1 == currentAltLoc && a2 == currentAltLoc) {
1153                   links.add(line);
1154                 }
1155                 break;
1156               case SSBOND:
1157 // =============================================================================
1158 // The SSBOND record identifies each disulfide bond in protein and polypeptide
1159 // structures by identifying the two residues involved in the bond.
1160 // The disulfide bond distance is included after the symmetry operations at
1161 // the end of the SSBOND record.
1162 //
1163 //  8 - 10        Integer         serNum       Serial number.
1164 // 12 - 14        LString(3)      "CYS"        Residue name.
1165 // 16             Character       chainID1     Chain identifier.
1166 // 18 - 21        Integer         seqNum1      Residue sequence number.
1167 // 22             AChar           icode1       Insertion code.
1168 // 26 - 28        LString(3)      "CYS"        Residue name.
1169 // 30             Character       chainID2     Chain identifier.
1170 // 32 - 35        Integer         seqNum2      Residue sequence number.
1171 // 36             AChar           icode2       Insertion code.
1172 // 60 - 65        SymOP           sym1         Symmetry oper for 1st resid
1173 // 67 - 72        SymOP           sym2         Symmetry oper for 2nd resid
1174 // 74 – 78        Real(5.2)      Length        Disulfide bond distance
1175 //
1176 // If SG of cysteine is disordered then there are possible alternate linkages.
1177 // wwPDB practice is to put together all possible SSBOND records. This is
1178 // problematic because the alternate location identifier is not specified in
1179 // the SSBOND record.
1180 //
1181 // Notes:
1182 // SSBOND records may be invalid if chain IDs are reused.
1183 // SSBOND records are applied by FFX to all conformers.
1184 // =============================================================================
1185                 ssbonds.add(line);
1186                 break;
1187               case HELIX:
1188 // =============================================================================
1189 // HELIX records are used to identify the position of helices in the molecule.
1190 // Helices are named, numbered, and classified by type. The residues where the
1191 // helix begins and ends are noted, as well as the total length.
1192 //
1193 //  8 - 10        Integer        serNum        Serial number of the helix. This starts
1194 //                                             at 1  and increases incrementally.
1195 // 12 - 14        LString(3)     helixID       Helix  identifier. In addition to a serial
1196 //                                             number, each helix is given an
1197 //                                             alphanumeric character helix identifier.
1198 // 16 - 18        Residue name   initResName   Name of the initial residue.
1199 // 20             Character      initChainID   Chain identifier for the chain containing
1200 //                                             this  helix.
1201 // 22 - 25        Integer        initSeqNum    Sequence number of the initial residue.
1202 // 26             AChar          initICode     Insertion code of the initial residue.
1203 // 28 - 30        Residue  name  endResName    Name of the terminal residue of the helix.
1204 // 32             Character      endChainID    Chain identifier for the chain containing
1205 //                                             this  helix.
1206 // 34 - 37        Integer        endSeqNum     Sequence number of the terminal residue.
1207 // 38             AChar          endICode      Insertion code of the terminal residue.
1208 // 39 - 40        Integer        helixClass    Helix class (see below).
1209 // 41 - 70        String         comment       Comment about this helix.
1210 // 72 - 76        Integer        length        Length of this helix.
1211 //
1212 //                                      CLASS NUMBER
1213 // TYPE OF  HELIX                     (COLUMNS 39 - 40)
1214 // --------------------------------------------------------------
1215 // Right-handed alpha (default)                1
1216 // Right-handed omega                          2
1217 // Right-handed pi                             3
1218 // Right-handed gamma                          4
1219 // Right-handed 3 - 10                         5
1220 // Left-handed alpha                           6
1221 // Left-handed omega                           7
1222 // Left-handed gamma                           8
1223 // 2 - 7 ribbon/helix                          9
1224 // Polyproline                                10
1225 // =============================================================================
1226               case SHEET:
1227 // =============================================================================
1228 // SHEET records are used to identify the position of sheets in the molecule.
1229 // Sheets are both named and numbered. The residues where the sheet begins and
1230 // ends are noted.
1231 //
1232 //  8 - 10        Integer       strand         Strand  number which starts at 1 for each
1233 //                                             strand within a sheet and increases by one.
1234 // 12 - 14        LString(3)    sheetID        Sheet  identifier.
1235 // 15 - 16        Integer       numStrands     Number  of strands in sheet.
1236 // 18 - 20        Residue name  initResName    Residue  name of initial residue.
1237 // 22             Character     initChainID    Chain identifier of initial residue in strand.
1238 // 23 - 26        Integer       initSeqNum     Sequence number of initial residue in strand.
1239 // 27             AChar         initICode      Insertion code of initial residue in  strand.
1240 // 29 - 31        Residue name  endResName     Residue name of terminal residue.
1241 // 33             Character     endChainID     Chain identifier of terminal residue.
1242 // 34 - 37        Integer       endSeqNum      Sequence number of terminal residue.
1243 // 38             AChar         endICode       Insertion code of terminal residue.
1244 // 39 - 40        Integer       sense          Sense of strand with respect to previous
1245 //                                             strand in the sheet. 0 if first strand,
1246 //                                             1 if  parallel,and -1 if anti-parallel.
1247 // 42 - 45        Atom          curAtom        Registration.  Atom name in current strand.
1248 // 46 - 48        Residue name  curResName     Registration.  Residue name in current strand
1249 // 50             Character     curChainId     Registration. Chain identifier in current strand.
1250 // 51 - 54        Integer       curResSeq      Registration.  Residue sequence number
1251 //                                             in current strand.
1252 // 55             AChar         curICode       Registration. Insertion code in current strand.
1253 // 57 - 60        Atom          prevAtom       Registration.  Atom name in previous strand.
1254 // 61 - 63        Residue name  prevResName    Registration.  Residue name in previous strand.
1255 // 65             Character     prevChainId    Registration.  Chain identifier in previous strand.
1256 // 66 - 69        Integer       prevResSeq     Registration. Residue sequence number
1257 //                                             in previous strand.
1258 // 70             AChar         prevICode      Registration.  Insertion code in
1259 //                                             previous strand.
1260 // =============================================================================
1261                 structs.add(line);
1262                 break;
1263               case MODEL: // Currently, no handling in initial read.
1264                 break;
1265               case MTRIX1:
1266 // ================================================================================
1267 // MTRIXn (n = 1, 2, or 3) records present transformations expressing
1268 // non-crystallographic symmetry.
1269 // MTRIXn will appear only when such transformations are required to generate an
1270 // entire asymmetric unit,
1271 // such as a large viral structure.
1272 //
1273 //  8 - 10        Integer       serial         Serial number.
1274 // 11 - 20        Real(10.6)    m[n][1]        Mn1
1275 // 21 - 30        Real(10.6)    m[n][2]        Mn2
1276 // 31 - 40        Real(10.6)    m[n][3]        Mn3
1277 // 21 - 30        Real(10.6)    v[n]           Vn
1278 // 60             Integer       iGiven         1 if coordinates for the representations which are
1279 //                                              approximately related by the transformations of the
1280 //                                              molecule are contained in the entry. Otherwise, blank.
1281 // =================================================================================
1282                 StringBuilder MTRX1 = new StringBuilder(line.substring(11, 55));
1283                 properties.addProperty("MTRIX1", MTRX1);
1284                 break;
1285               case MTRIX2:
1286                 StringBuilder MTRX2 = new StringBuilder(line.substring(11, 55));
1287                 properties.addProperty("MTRIX2", MTRX2);
1288                 break;
1289               case MTRIX3:
1290                 StringBuilder MTRX3 = new StringBuilder(line.substring(11, 55));
1291                 properties.addProperty("MTRIX3", MTRX3);
1292                 break;
1293               case REMARK:
1294                 remarkLines.add(line.trim());
1295                 if (line.contains("Lambda:")) {
1296                   Matcher m = lambdaPattern.matcher(line);
1297                   if (m.find()) {
1298                     lastReadLambda = Double.parseDouble(m.group(1));
1299                   }
1300                 }
1301 // =================================================================================
1302 // REMARK 350: presents all transformations, both crystallographic and non-crystallographic,
1303 // needed to generate the biomolecule. These transformations operate on the coordinates in the
1304 // entry. Both author and computational descriptions of assemblies are provided, if applicable.
1305 // For strict ncs case where more than one assembly presents in asymmetric unit, only one
1306 // chain with unit matrix will reported in REMARK 350, the other chain will be generated
1307 // by rotation and translation.
1308 //
1309 // 20 - 23        Integer       serial         Serial number.
1310 // 24 - 33        Real(10.6)    m[n][1]        Mn1
1311 // 34 - 43        Real(10.6)    m[n][2]        Mn2
1312 // 44 - 53        Real(10.6)    m[n][3]        Mn3
1313 // 59 - 68        Real(10.6)    v[n]           Vn
1314 // =================================================================================
1315                 if (line.length() >= 68) {
1316                   String remarkType = line.substring(7, 10).trim();
1317                   if (remarkType.matches("\\d+") && parseInt(remarkType) == 350 && line.substring(13,
1318                           18).equalsIgnoreCase("BIOMT")) {
1319                     properties.addProperty("BIOMTn", new StringBuilder(line.substring(24, 68)));
1320                   }
1321                 }
1322                 break;
1323               default:
1324                 break;
1325             }
1326             line = br.readLine();
1327           }
1328 
1329         } catch (FileNotFoundException fileNotFoundException) {
1330           logger.log(Level.SEVERE, " PDB file not found", fileNotFoundException);
1331         }
1332       }
1333       xyzIndex--;
1334       setFileRead(true);
1335     } catch (IOException e) {
1336       logger.exiting(PDBFilter.class.getName(), "readFile", e);
1337       return false;
1338     }
1339 
1340     // Locate disulfide bonds; bond parameters are assigned below.
1341     List<Bond> ssBondList = locateDisulfideBonds(ssbonds, activeMolecularAssembly, pdbToNewResMap);
1342 
1343     // Record the number of atoms read in from the PDB file before applying
1344     // algorithms that may build new atoms.
1345     int pdbAtoms = activeMolecularAssembly.getAtomArray().length;
1346     removeExcessHydrogens();
1347     // Build missing backbone atoms in loops.
1348     buildMissingResidues(xyzIndex, activeMolecularAssembly, seqRes, dbRef);
1349 
1350     // Assign atom types. Missing side-chains atoms and missing hydrogen will be built in.
1351     bondList = assignAtomTypes(activeMolecularAssembly, fileStandard);
1352 
1353     // Assign disulfide bonds parameters and log their creation.
1354     buildDisulfideBonds(ssBondList, activeMolecularAssembly, bondList);
1355 
1356     // Finally, re-number the atoms if missing atoms were created.
1357     int currentN = activeMolecularAssembly.getAtomArray().length;
1358     boolean renumber = forceField.getBoolean("renumber-pdb", false);
1359     if (pdbAtoms != currentN) {
1360       logger.info(format(" Renumbering PDB file due to built atoms (%d vs %d)", currentN, pdbAtoms));
1361       numberAtoms(activeMolecularAssembly);
1362     } else if (renumber) {
1363       logger.info(" Renumbering PDB file due to renumber-pdb flag.");
1364       numberAtoms(activeMolecularAssembly);
1365     }
1366     return true;
1367   }
1368 
1369   public void removeExcessHydrogens(){
1370     logger.info(" Removing excess Hydrogens");
1371     for(Residue residue: activeMolecularAssembly.getResidueList()){
1372       if(residue.getName().equals("ACE") || residue.getName().equals("NME") || residue.getResidueType() != Residue.ResidueType.AA){
1373         break;
1374       }
1375       String trueResName = residue.getAtomByName("CA", true).getResidueName();
1376       Atom atom;
1377       switch (trueResName) {
1378         case "HID", "GLU" -> {
1379           // No HE2
1380           atom = residue.getAtomByName("HE2", true);
1381         }
1382         case "HIE" -> {
1383           // No HD1
1384           atom = residue.getAtomByName("HD1", true);
1385         }
1386         case "ASP" -> {
1387           // No HD2
1388           atom = residue.getAtomByName("HD2", true);
1389         }
1390         case "LYD" -> {
1391           // No HZ3
1392           atom = residue.getAtomByName("HZ3", true);
1393         }
1394         case "CYD" -> {
1395           // No HG
1396           atom = residue.getAtomByName("HG", true);
1397         }
1398         default -> {
1399           atom = null;
1400         }
1401         // Do nothing.
1402       }
1403       if(atom != null){
1404         int index = activeMolecularAssembly.getResidueList().indexOf(residue);
1405         MSNode atoms = residue.getAtomNode();
1406         atoms.remove(atom);
1407         residue.setName(trueResName);
1408         activeMolecularAssembly.getResidueList().set(index, residue);
1409       }
1410     }
1411   }
1412 
1413   /** {@inheritDoc} */
1414   @Override
1415   public boolean readNext() {
1416     return readNext(false);
1417   }
1418 
1419   /** {@inheritDoc} */
1420   @Override
1421   public boolean readNext(boolean resetPosition) {
1422     return readNext(resetPosition, false);
1423   }
1424 
1425   /** {@inheritDoc} */
1426   @Override
1427   public boolean readNext(boolean resetPosition, boolean print) {
1428     return readNext(resetPosition, print, true);
1429   }
1430 
1431   /** {@inheritDoc} */
1432   @Override
1433   @SuppressWarnings("fallthrough")
1434   public boolean readNext(boolean resetPosition, boolean print, boolean parse) {
1435     modelsRead = resetPosition ? 1 : modelsRead + 1;
1436     if (!parse) {
1437       if (print) {
1438         logger.info(format(" Skipped Model %d.", modelsRead));
1439       }
1440       return true;
1441     }
1442     remarkLines = new ArrayList<>(remarkLines.size());
1443     // ^ is beginning of line, \\s+ means "one or more whitespace", (\\d+) means match and capture
1444     // one or more digits.
1445     Pattern modelPatt = Pattern.compile("^MODEL\\s+(\\d+)");
1446     boolean eof = true;
1447     for (MolecularAssembly system : systems) {
1448       try {
1449         BufferedReader currentReader;
1450         if (readers.containsKey(system)) {
1451           currentReader = readers.get(system);
1452           try {
1453             if (!currentReader.ready()) {
1454               currentReader = new BufferedReader(new FileReader(readFile));
1455               // Mark the start of the file.
1456               currentReader.mark(0);
1457               readers.remove(system);
1458               readers.put(system, currentReader);
1459             } else if (resetPosition) {
1460               // If the BufferedReader has been opened, and reset is requested, reset the position.
1461               currentReader.reset();
1462             }
1463           } catch (Exception exception) {
1464             // If all structures in the PDB file have been read, the currentReader may have closed.
1465             // The try block will catch this case and reset to the beginning of the file.
1466             currentReader = new BufferedReader(new FileReader(readFile));
1467             // Mark the start of the file.
1468             currentReader.mark(0);
1469             readers.remove(system);
1470             readers.put(system, currentReader);
1471           }
1472         } else {
1473           currentReader = new BufferedReader(new FileReader(readFile));
1474           // Mark the start of the file.
1475           currentReader.mark(0);
1476           readers.put(system, currentReader);
1477         }
1478 
1479         // Skip to appropriate model.
1480         String line = currentReader.readLine();
1481         while (line != null) {
1482           line = line.trim();
1483           Matcher m = modelPatt.matcher(line);
1484           if (m.find()) {
1485             int modelNum = parseInt(m.group(1));
1486             if (modelNum == modelsRead) {
1487               if (print) {
1488                 logger.log(Level.INFO, format(" Reading model %d for %s", modelNum, currentFile));
1489               }
1490               eof = false;
1491               break;
1492             }
1493           }
1494           line = currentReader.readLine();
1495         }
1496         if (eof) {
1497           if (logger.isLoggable(Level.FINEST)) {
1498             logger.log(Level.FINEST, format("\n End of file reached for %s", readFile));
1499           }
1500           currentReader.close();
1501           return false;
1502         }
1503 
1504         // Begin parsing the model.
1505         currentChainID = null;
1506         currentSegID = null;
1507         boolean modelDone = false;
1508         line = currentReader.readLine();
1509         while (line != null) {
1510           line = line.trim();
1511           String recID = line.substring(0, Math.min(6, line.length())).trim();
1512           try {
1513             Record record = Record.valueOf(recID);
1514             boolean hetatm = true;
1515             switch (record) {
1516               // =============================================================================
1517               //
1518               //  7 - 11        Integer       serial       Atom serial number.
1519               // 13 - 16        Atom          name         Atom name.
1520               // 17             Character     altLoc       Alternate location indicator.
1521               // 18 - 20        Residue name  resName      Residue name.
1522               // 22             Character     chainID      Chain identifier.
1523               // 23 - 26        Integer       resSeq       Residue sequence number.
1524               // 27             AChar         iCode        Code for insertion of residues.
1525               // 31 - 38        Real(8.3)     x            Orthogonal coordinates for X in
1526               // Angstroms.
1527               // 39 - 46        Real(8.3)     y            Orthogonal coordinates for Y in
1528               // Angstroms.
1529               // 47 - 54        Real(8.3)     z            Orthogonal coordinates for Z in
1530               // Angstroms.
1531               // 55 - 60        Real(6.2)     occupancy    Occupancy.
1532               // 61 - 66        Real(6.2)     tempFactor   Temperature factor.
1533               // 77 - 78        LString(2)    element      Element symbol, right-justified.
1534               // 79 - 80        LString(2)    charge       Charge  on the atom.
1535               // =============================================================================
1536               //         1         2         3         4         5         6         7
1537               // 123456789012345678901234567890123456789012345678901234567890123456789012345678
1538               // ATOM      1  N   ILE A  16      60.614  71.140 -10.592  1.00  7.38           N
1539               // ATOM      2  CA  ILE A  16      60.793  72.149  -9.511  1.00  6.91           C
1540               case ATOM:
1541                 hetatm = false;
1542               case HETATM:
1543                 String name = line.substring(12, 16).trim();
1544                 if (name.toUpperCase().contains("1H") || name.toUpperCase().contains("2H")
1545                         || name.toUpperCase().contains("3H")) {
1546                   // VERSION3_2 is presently just a placeholder for "anything non-standard".
1547                   fileStandard = VERSION3_2;
1548                 }
1549                 Character altLoc = line.substring(16, 17).toUpperCase().charAt(0);
1550                 if (!altLoc.equals(' ') && !altLoc.equals(currentAltLoc)) {
1551                   break;
1552                 }
1553                 // if (!altLoc.equals(' ') && !altLoc.equals('A') && !altLoc.equals(currentAltLoc)) {
1554                 //  break;
1555                 // }
1556                 String resName = line.substring(17, 20).trim();
1557                 Character chainID = line.substring(21, 22).charAt(0);
1558                 String segID = getExistingSegID(chainID);
1559                 int resSeq = Hybrid36.decode(4, line.substring(22, 26));
1560                 double[] d = new double[3];
1561                 d[0] = parseDouble(line.substring(30, 38).trim());
1562                 d[1] = parseDouble(line.substring(38, 46).trim());
1563                 d[2] = parseDouble(line.substring(46, 54).trim());
1564                 double occupancy = 1.0;
1565                 double tempFactor = 1.0;
1566                 Atom newAtom = new Atom(0, name, altLoc, d, resName, resSeq, chainID, occupancy,
1567                         tempFactor, segID);
1568                 newAtom.setHetero(hetatm);
1569                 // Check if this is a modified residue.
1570                 if (modRes.containsKey(resName.toUpperCase())) {
1571                   newAtom.setModRes(true);
1572                 }
1573 
1574                 Atom returnedAtom = activeMolecularAssembly.findAtom(newAtom);
1575                 if (returnedAtom != null) {
1576                   returnedAtom.setXYZ(d);
1577                   double[] retXYZ = new double[3];
1578                   returnedAtom.getXYZ(retXYZ);
1579                 } else {
1580                   String message = format(" Could not find atom %s in assembly", newAtom);
1581                   if (dieOnMissingAtom) {
1582                     logger.severe(message);
1583                   } else {
1584                     logger.warning(message);
1585                   }
1586                 }
1587                 break;
1588               case CRYST1:
1589                 // =============================================================================
1590                 // The CRYST1 record presents the unit cell parameters, space group, and Z
1591                 // value. If the structure was not determined by crystallographic means, CRYST1
1592                 // simply provides the unitary values, with an appropriate REMARK.
1593                 //
1594                 //  7 - 15       Real(9.3)     a              a (Angstroms).
1595                 // 16 - 24       Real(9.3)     b              b (Angstroms).
1596                 // 25 - 33       Real(9.3)     c              c (Angstroms).
1597                 // 34 - 40       Real(7.2)     alpha          alpha (degrees).
1598                 // 41 - 47       Real(7.2)     beta           beta (degrees).
1599                 // 48 - 54       Real(7.2)     gamma          gamma (degrees).
1600                 // 56 - 66       LString       sGroup         Space  group.
1601                 // 67 - 70       Integer       z              Z value.
1602                 // =============================================================================
1603                 logger.fine(" Crystal record found.");
1604                 if (line.length() < 55) {
1605                   logger.severe(" CRYST1 record is improperly formatted.");
1606                 }
1607                 double aaxis = parseDouble(line.substring(6, 15).trim());
1608                 double baxis = parseDouble(line.substring(15, 24).trim());
1609                 double caxis = parseDouble(line.substring(24, 33).trim());
1610                 double alpha = parseDouble(line.substring(33, 40).trim());
1611                 double beta = parseDouble(line.substring(40, 47).trim());
1612                 double gamma = parseDouble(line.substring(47, 54).trim());
1613                 int limit = min(line.length(), 66);
1614                 String sg = line.substring(55, limit).trim();
1615 //                properties.clearProperty("a-axis");
1616 //                properties.clearProperty("b-axis");
1617 //                properties.clearProperty("c-axis");
1618 //                properties.clearProperty("alpha");
1619 //                properties.clearProperty("beta");
1620 //                properties.clearProperty("gamma");
1621 //                properties.clearProperty("spacegroup");
1622 //
1623 //                properties.addProperty("a-axis", aaxis);
1624 //                properties.addProperty("b-axis", baxis);
1625 //                properties.addProperty("c-axis", caxis);
1626 //                properties.addProperty("alpha", alpha);
1627 //                properties.addProperty("beta", beta);
1628 //                properties.addProperty("gamma", gamma);
1629 //                properties.addProperty("spacegroup", SpaceGroupInfo.pdb2ShortName(sg));
1630                 Crystal crystal = activeMolecularAssembly.getCrystal();
1631                 SpaceGroup spaceGroup = SpaceGroupDefinitions.spaceGroupFactory(sg);
1632                 if (Objects.equals(crystal.spaceGroup.shortName, spaceGroup.shortName)) {
1633                   crystal.changeUnitCellParameters(aaxis, baxis, caxis, alpha, beta, gamma);
1634                 } else {
1635                   // TODO: Handle changes in space groups... Means recalculating force field terms.
1636                   logger.warning(format(" Original space group %s could not be changed to %s",
1637                           crystal.spaceGroup.shortName, spaceGroup.shortName));
1638                 }
1639                 break;
1640               case ENDMDL:
1641               case END: // Technically speaking, END should be at the end of the file, not end of
1642                 // the model.
1643                 logger.log(Level.FINE, format(" Model %d successfully read", modelsRead));
1644                 modelDone = true;
1645                 break;
1646               case REMARK:
1647                 remarkLines.add(line.trim());
1648                 if (line.contains("Lambda:")) {
1649                   Matcher m = lambdaPattern.matcher(line);
1650                   if (m.find()) {
1651                     lastReadLambda = Double.parseDouble(m.group(1));
1652                   }
1653                 }
1654                 break;
1655               default:
1656                 break;
1657             }
1658           } catch (Exception ex) {
1659             // Do nothing; it's not an ATOM/HETATM line.
1660           }
1661           if (modelDone) {
1662             break;
1663           }
1664           line = currentReader.readLine();
1665         }
1666         return true;
1667       } catch (IOException ex) {
1668         logger.info(
1669                 format(" Exception in parsing frame %d of %s:" + " %s", modelsRead, system.toString(),
1670                         ex));
1671       }
1672     }
1673     return false;
1674   }
1675 
1676   /**
1677    * Specify the alternate location.
1678    *
1679    * @param molecularAssembly The MolecularAssembly to populate.
1680    * @param altLoc The alternate location to use.
1681    */
1682   public void setAltID(MolecularAssembly molecularAssembly, Character altLoc) {
1683     setMolecularSystem(molecularAssembly);
1684     currentAltLoc = altLoc;
1685   }
1686 
1687   /**
1688    * Sets whether this PDBFilter should log each time it saves to a file.
1689    *
1690    * @param logWrites a boolean.
1691    */
1692   public void setLogWrites(boolean logWrites) {
1693     this.logWrites = logWrites;
1694   }
1695 
1696   /**
1697    * setModelNumbering.
1698    *
1699    * @param modelsWritten the number of models written.
1700    */
1701   public void setModelNumbering(int modelsWritten) {
1702     this.modelsWritten = modelsWritten;
1703   }
1704 
1705   public void setLMN(int[] lmn) {
1706     if(lmn[0] >= 1 && lmn[1] >= 1 && lmn[2] >= 1){
1707       this.lmn = lmn;
1708     }else{
1709       // Provided dimensions are not handled. Revert to P1.
1710       this.lmn = new int[]{1,1,1};
1711     }
1712   }
1713 
1714   /**
1715    * setSymOp.
1716    *
1717    * @param symOp a int.
1718    */
1719   public void setSymOp(int symOp) {
1720     this.nSymOp = symOp;
1721   }
1722 
1723   /**
1724    * Expand the current system to P1 during the save operation.
1725    *
1726    * @param file The file to write.
1727    * @return Return true on a successful write.
1728    */
1729   public boolean writeFileAsP1(File file) {
1730     // XYZ File First Line
1731     final int l = lmn[0];
1732     final int m = lmn[1];
1733     final int n = lmn[2];
1734     final int numReplicates = l * m * n;
1735     Crystal crystal = activeMolecularAssembly.getCrystal();
1736     int nSymOps = crystal.getUnitCell().spaceGroup.getNumberOfSymOps();
1737 
1738     if (nSymOps == 1 && l <= 1 && m <= 1 && n <= 1) {
1739       // This is a P1 system.
1740       if (!writeFile(file, false)) {
1741         logger.info(format(" Save failed for %s", activeMolecularAssembly));
1742         return false;
1743       } else {
1744         return true;
1745       }
1746     } else {
1747       Polymer[] polymers = activeMolecularAssembly.getChains();
1748       int chainCount = 0;
1749       for (Polymer polymer : polymers) {
1750         Character chainID = Polymer.CHAIN_IDS.charAt(chainCount++);
1751         polymer.setChainID(chainID);
1752         polymer.setSegID(chainID.toString());
1753       }
1754       nSymOp = 0;
1755       logWrites = false;
1756       boolean writeEnd = false;
1757       if (!writeFile(file, false, false, writeEnd)) {
1758         logger.info(format(" Save failed for %s", activeMolecularAssembly));
1759         return false;
1760       } else {
1761         for (int i = 0; i < l; i++) {
1762           for (int j = 0; j < m; j++) {
1763             for (int k = 0; k < n; k++) {
1764               lValue = i;
1765               mValue = j;
1766               nValue = k;
1767               for (int iSym = 0; iSym < nSymOps; iSym++) {
1768                 nSymOp = iSym;
1769                 for (Polymer polymer : polymers) {
1770                   Character chainID = Polymer.CHAIN_IDS.charAt(chainCount++);
1771                   polymer.setChainID(chainID);
1772                   polymer.setSegID(chainID.toString());
1773                 }
1774                 // If the last sym op to be written.
1775                 writeEnd = iSym == nSymOps - 1 && i == l - 1 && j == m - 1 && k == n - 1;
1776                 if (!writeFile(file, true, false, writeEnd)) {
1777                   logger.info(format(" Save failed for %s", activeMolecularAssembly));
1778                   return false;
1779                 }
1780               }
1781             }
1782           }
1783         }
1784       }
1785 
1786       // Reset the chainIDs.
1787       chainCount = 0;
1788       for (Polymer polymer : polymers) {
1789         Character chainID = Polymer.CHAIN_IDS.charAt(chainCount++);
1790         polymer.setChainID(chainID);
1791         polymer.setSegID(chainID.toString());
1792       }
1793 
1794     }
1795 
1796     return true;
1797   }
1798 
1799   /**
1800    * writeFile
1801    *
1802    * @param saveFile a {@link java.io.File} object.
1803    * @param append Whether to append to saveFile (vs over-write).
1804    * @param printLinear Ignored (remains to present a different method signature).
1805    * @param writeEnd True if this is the final model.
1806    * @return Success of writing.
1807    */
1808   public boolean writeFile(File saveFile, boolean append, boolean printLinear, boolean writeEnd) {
1809     return writeFile(saveFile, append, Collections.emptySet(), writeEnd, true);
1810   }
1811 
1812   /**
1813    * writeFile
1814    *
1815    * @param saveFile a {@link java.io.File} object to save to.
1816    * @param append Whether to append to saveFile (vs over-write).
1817    * @param toExclude A {@link java.util.Set} of {@link ffx.potential.bonded.Atom}s to exclude
1818    *     from writing.
1819    * @param writeEnd True if this is the final model.
1820    * @param versioning True if the file being saved to should be versioned. False if the file
1821    *     being saved to should be overwritten.
1822    * @return Success of writing.
1823    */
1824   public boolean writeFile(File saveFile, boolean append, Set<Atom> toExclude, boolean writeEnd,
1825                            boolean versioning) {
1826     return writeFile(saveFile, append, toExclude, writeEnd, versioning, null);
1827   }
1828 
1829   /**
1830    * writeFile
1831    *
1832    * @param saveFile a {@link java.io.File} object to save to.
1833    * @param append Whether to append to saveFile (vs over-write).
1834    * @param toExclude A {@link java.util.Set} of {@link ffx.potential.bonded.Atom}s to exclude
1835    *     from writing.
1836    * @param writeEnd True if this is the final model.
1837    * @param versioning True if the file being saved to should be versioned. False if the file
1838    *     being saved to should be overwritten.
1839    * @param extraLines Extra comment/header lines to write.
1840    * @return Success of writing.
1841    */
1842   public boolean writeFile(File saveFile, boolean append, Set<Atom> toExclude, boolean writeEnd,
1843                            boolean versioning, String[] extraLines) {
1844     // Set standardize atom names to false in the presence of deuterium
1845     List<Atom> deuteriumAtoms = new ArrayList<>();
1846     for(Atom atom: activeMolecularAssembly.getAtomArray()){
1847       if(atom.getName().startsWith("D")){
1848         String name = atom.getName().replace("D","H");
1849         atom.setName(name);
1850         deuteriumAtoms.add(atom);
1851       }
1852     }
1853     if (standardizeAtomNames) {
1854       logger.info(" Setting atom names to PDB standard.");
1855       renameAtomsToPDBStandard(activeMolecularAssembly);
1856     }
1857 
1858     for(Atom atom: activeMolecularAssembly.getAtomArray()){
1859       if(deuteriumAtoms.contains(atom) && atom.getName().startsWith("H")){
1860         String name = atom.getName().replace("H","D");
1861         atom.setName(name);
1862       }
1863     }
1864     final Set<Atom> atomExclusions = toExclude == null ? Collections.emptySet() : toExclude;
1865     if (saveFile == null) {
1866       return false;
1867     }
1868     if (vdwH) {
1869       logger.info(" Saving hydrogen to van der Waals centers instead of nuclear locations.");
1870     }
1871     if (nSymOp > -1) {
1872       logger.info(format(" Saving atoms using the symmetry operator:\n%s\n",
1873               activeMolecularAssembly.getCrystal().getUnitCell().spaceGroup.getSymOp(nSymOp)
1874                       .toString()));
1875     }
1876 
1877     // Create StringBuilders for ATOM, ANISOU and TER records that can be reused.
1878     StringBuilder sb = new StringBuilder("ATOM  ");
1879     StringBuilder anisouSB = new StringBuilder("ANISOU");
1880     StringBuilder terSB = new StringBuilder("TER   ");
1881     StringBuilder model = null;
1882     for (int i = 6; i < 80; i++) {
1883       sb.append(' ');
1884       anisouSB.append(' ');
1885       terSB.append(' ');
1886     }
1887 
1888     File newFile = saveFile;
1889     if (!append) {
1890       if (versioning) {
1891         newFile = version(saveFile);
1892       }
1893     } else if (modelsWritten >= 0) {
1894       model = new StringBuilder(format("MODEL     %-4d", ++modelsWritten));
1895       model.append(repeat(" ", 65));
1896     }
1897     activeMolecularAssembly.setFile(newFile);
1898     if (activeMolecularAssembly.getName() == null) {
1899       activeMolecularAssembly.setName(newFile.getName());
1900     }
1901     if (logWrites) {
1902       logger.log(Level.INFO, " Saving {0}", newFile.getName());
1903     }
1904 
1905     try (FileWriter fw = new FileWriter(newFile, append);
1906          BufferedWriter bw = new BufferedWriter(fw)) {
1907       /*
1908        Will come before CRYST1 and ATOM records, but after anything
1909        written by writeFileWithHeader (particularly X-ray refinement statistics).
1910       */
1911       String[] headerLines = activeMolecularAssembly.getHeaderLines();
1912       for (String line : headerLines) {
1913         bw.write(format("%s\n", line));
1914       }
1915       if (extraLines != null) {
1916         if (rotamerTitration && extraLines[0].contains("REMARK")) {
1917           for (String line : extraLines) {
1918             bw.write(line + "\n");
1919           }
1920         } else {
1921           for (String line : extraLines) {
1922             bw.write(format("REMARK 999 %s\n", line));
1923           }
1924         }
1925       }
1926       if (model != null) {
1927         bw.write(model.toString());
1928         bw.newLine();
1929       }
1930       // =============================================================================
1931       // The CRYST1 record presents the unit cell parameters, space group, and Z
1932       // value. If the structure was not determined by crystallographic means, CRYST1
1933       // simply provides the unitary values, with an appropriate REMARK.
1934       //
1935       //  7 - 15       Real(9.3)     a              a (Angstroms).
1936       // 16 - 24       Real(9.3)     b              b (Angstroms).
1937       // 25 - 33       Real(9.3)     c              c (Angstroms).
1938       // 34 - 40       Real(7.2)     alpha          alpha (degrees).
1939       // 41 - 47       Real(7.2)     beta           beta (degrees).
1940       // 48 - 54       Real(7.2)     gamma          gamma (degrees).
1941       // 56 - 66       LString       sGroup         Space  group.
1942       // 67 - 70       Integer       z              Z value.
1943       // =============================================================================
1944       if (nSymOp < 0) {
1945         // Write out the unit cell.
1946         Crystal crystal = activeMolecularAssembly.getCrystal();
1947         if (crystal != null && !crystal.aperiodic()) {
1948           Crystal c = crystal.getUnitCell();
1949           if (lmn[0] > 0 || lmn[1] > 0 || lmn[2] > 0) {
1950             c.a = c.a * lmn[0];
1951             c.b = c.b * lmn[1];
1952             c.c = c.c * lmn[2];
1953           }
1954           bw.write(c.toCRYST1());
1955         }
1956       } else if (nSymOp == 0) {
1957         // Write a P1 cell.
1958         Crystal crystal = activeMolecularAssembly.getCrystal();
1959         if (crystal != null && !crystal.aperiodic()) {
1960           Crystal c = crystal.getUnitCell();
1961           Crystal p1 = new Crystal((lmn[0]>0)? c.a * lmn[0] : c.a, (lmn[1]>0)? c.b * lmn[1] : c.b, (lmn[2]>0)? c.c * lmn[2] : c.c, c.alpha, c.beta, c.gamma, "P1");
1962           bw.write(p1.toCRYST1());
1963         }
1964       }
1965       // =============================================================================
1966       // The SSBOND record identifies each disulfide bond in protein and polypeptide
1967       // structures by identifying the two residues involved in the bond.
1968       // The disulfide bond distance is included after the symmetry operations at
1969       // the end of the SSBOND record.
1970       //
1971       //  8 - 10        Integer         serNum       Serial number.
1972       // 12 - 14        LString(3)      "CYS"        Residue name.
1973       // 16             Character       chainID1     Chain identifier.
1974       // 18 - 21        Integer         seqNum1      Residue sequence number.
1975       // 22             AChar           icode1       Insertion code.
1976       // 26 - 28        LString(3)      "CYS"        Residue name.
1977       // 30             Character       chainID2     Chain identifier.
1978       // 32 - 35        Integer         seqNum2      Residue sequence number.
1979       // 36             AChar           icode2       Insertion code.
1980       // 60 - 65        SymOP           sym1         Symmetry oper for 1st resid
1981       // 67 - 72        SymOP           sym2         Symmetry oper for 2nd resid
1982       // 74 – 78        Real(5.2)      Length        Disulfide bond distance
1983       //
1984       // If SG of cysteine is disordered then there are possible alternate linkages.
1985       // wwPDB practice is to put together all possible SSBOND records. This is
1986       // problematic because the alternate location identifier is not specified in
1987       // the SSBOND record.
1988       // =============================================================================
1989       int serNum = 1;
1990       Polymer[] polymers = activeMolecularAssembly.getChains();
1991       if (polymers != null) {
1992         for (Polymer polymer : polymers) {
1993           List<Residue> residues = polymer.getResidues();
1994           for (Residue residue : residues) {
1995             if (residue.getName().equalsIgnoreCase("CYS")) {
1996               List<Atom> cysAtoms = residue.getAtomList().stream()
1997                       .filter(a -> !atomExclusions.contains(a)).toList();
1998               Atom SG1 = null;
1999               for (Atom atom : cysAtoms) {
2000                 String atName = atom.getName().toUpperCase();
2001                 if (atName.equals("SG") || atName.equals("SH")
2002                         || atom.getAtomType().atomicNumber == 16) {
2003                   SG1 = atom;
2004                   break;
2005                 }
2006               }
2007               List<Bond> bonds = SG1.getBonds();
2008               for (Bond bond : bonds) {
2009                 Atom SG2 = bond.get1_2(SG1);
2010                 if (SG2.getAtomType().atomicNumber == 16 && !atomExclusions.contains(SG2)) {
2011                   if (SG1.getIndex() < SG2.getIndex()) {
2012                     bond.energy(false);
2013                     bw.write(format("SSBOND %3d CYS %1s %4s    CYS %1s %4s %36s %5.2f\n", serNum++,
2014                             SG1.getChainID().toString(), Hybrid36.encode(4, SG1.getResidueNumber()),
2015                             SG2.getChainID().toString(), Hybrid36.encode(4, SG2.getResidueNumber()), "",
2016                             bond.getValue()));
2017                   }
2018                 }
2019               }
2020             }
2021           }
2022         }
2023       }
2024       // =============================================================================
2025       //
2026       //  7 - 11        Integer       serial       Atom serial number.
2027       // 13 - 16        Atom          name         Atom name.
2028       // 17             Character     altLoc       Alternate location indicator.
2029       // 18 - 20        Residue name  resName      Residue name.
2030       // 22             Character     chainID      Chain identifier.
2031       // 23 - 26        Integer       resSeq       Residue sequence number.
2032       // 27             AChar         iCode        Code for insertion of residues.
2033       // 31 - 38        Real(8.3)     x            Orthogonal coordinates for X in Angstroms.
2034       // 39 - 46        Real(8.3)     y            Orthogonal coordinates for Y in Angstroms.
2035       // 47 - 54        Real(8.3)     z            Orthogonal coordinates for Z in Angstroms.
2036       // 55 - 60        Real(6.2)     occupancy    Occupancy.
2037       // 61 - 66        Real(6.2)     tempFactor   Temperature factor.
2038       // 77 - 78        LString(2)    element      Element symbol, right-justified.
2039       // 79 - 80        LString(2)    charge       Charge  on the atom.
2040       // =============================================================================
2041       //         1         2         3         4         5         6         7
2042       // 123456789012345678901234567890123456789012345678901234567890123456789012345678
2043       // ATOM      1  N   ILE A  16      60.614  71.140 -10.592  1.00  7.38           N
2044       // ATOM      2  CA  ILE A  16      60.793  72.149  -9.511  1.00  6.91           C
2045       MolecularAssembly[] molecularAssemblies = this.getMolecularAssemblyArray();
2046       int serial = 1;
2047       if (nSymOp > 0) {
2048         serial = serialP1;
2049       }
2050 
2051       // Loop over biomolecular chains
2052       if (polymers != null) {
2053         for (Polymer polymer : polymers) {
2054           currentSegID = polymer.getName();
2055           currentChainID = polymer.getChainID();
2056           sb.setCharAt(21, currentChainID);
2057           // Loop over residues
2058           List<Residue> residues = polymer.getResidues();
2059           for (Residue residue : residues) {
2060             String resName = residue.getName();
2061             if (resName.length() > 3) {
2062               resName = resName.substring(0, 3);
2063             }
2064             int resID = residue.getResidueNumber();
2065             sb.replace(17, 20, padLeft(resName.toUpperCase(), 3));
2066             sb.replace(22, 26, format("%4s", Hybrid36.encode(4, resID)));
2067             // Loop over atoms
2068             List<Atom> residueAtoms = residue.getAtomList().stream()
2069                     .filter(a -> !atomExclusions.contains(a)).collect(Collectors.toList());
2070             boolean altLocFound = false;
2071             for (Atom atom : residueAtoms) {
2072               if (mutate) {
2073                 for (Mutation mtn : mutations) {
2074                   if (resID == mtn.resID && currentChainID == mtn.chainChar) {
2075                     mtn.addTors(atom, serial);
2076                     ArrayList<String> alchAtoms = mtn.getAlchemicalAtoms(true);
2077                     if (alchAtoms != null) {
2078                       if (residue.getBackboneAtoms().contains(atom) && alchAtoms.contains(atom.getName())) {
2079                         logger.info(format(" MUTATION atom is %d chain %s",serial, currentChainID));
2080                       }
2081                     } else {
2082                       // treating pur-pyr or pyr-pur N9/N1 & C2/C4 as alchemical
2083                       if (residue.getBackboneAtoms().contains(atom)) {
2084                         logger.info(format(" MUTATION atom is %d chain %s",serial, currentChainID));
2085                       }
2086                     }
2087                   }
2088                 }
2089               }
2090               writeAtom(atom, serial++, sb, anisouSB, bw);
2091               Character altLoc = atom.getAltLoc();
2092               if (altLoc != null && !altLoc.equals(' ')) {
2093                 altLocFound = true;
2094               }
2095             }
2096             // Write out alternate conformers
2097             if (altLocFound) {
2098               for (int ma = 1; ma < molecularAssemblies.length; ma++) {
2099                 MolecularAssembly altMolecularAssembly = molecularAssemblies[ma];
2100                 Polymer altPolymer = altMolecularAssembly.getPolymer(currentChainID, currentSegID, false);
2101                 Residue altResidue = altPolymer.getResidue(resName, resID, false, Residue.ResidueType.AA);
2102                 if (altResidue == null) {
2103                   resName = AminoAcid3.UNK.name();
2104                   altResidue = altPolymer.getResidue(resName, resID, false, Residue.ResidueType.AA);
2105                 }
2106                 residueAtoms = altResidue.getAtomList().stream()
2107                         .filter(a -> !atomExclusions.contains(a)).collect(Collectors.toList());
2108                 for (Atom atom : residueAtoms) {
2109                   if (atom.getAltLoc() != null && !atom.getAltLoc().equals(' ') && !atom.getAltLoc()
2110                           .equals('A')) {
2111                     sb.replace(17, 20, padLeft(atom.getResidueName().toUpperCase(), 3));
2112                     writeAtom(atom, serial++, sb, anisouSB, bw);
2113                   }
2114                 }
2115               }
2116             }
2117           }
2118           terSB.replace(6, 11, format("%5s", Hybrid36.encode(5, serial++)));
2119           terSB.replace(12, 16, "    ");
2120           terSB.replace(16, 26, sb.substring(16, 26));
2121           bw.write(terSB.toString());
2122           bw.newLine();
2123         }
2124       }
2125       sb.replace(0, 6, "HETATM");
2126       sb.setCharAt(21, 'A');
2127 
2128       Character chainID = 'A';
2129       if (polymers != null) {
2130         chainID = polymers[0].getChainID();
2131       }
2132       activeMolecularAssembly.setChainIDAndRenumberMolecules(chainID);
2133 
2134       // Loop over molecules, ions and then water.
2135       List<MSNode> molecules = activeMolecularAssembly.getMolecules();
2136       int numMolecules = molecules.size();
2137       for (int i = 0; i < numMolecules; i++) {
2138         Molecule molecule = (Molecule) molecules.get(i);
2139         chainID = molecule.getChainID();
2140         sb.setCharAt(21, chainID);
2141         String resName = molecule.getResidueName();
2142         int resID = molecule.getResidueNumber();
2143         if (resName.length() > 3) {
2144           resName = resName.substring(0, 3);
2145         }
2146         sb.replace(17, 20, padLeft(resName.toUpperCase(), 3));
2147         sb.replace(22, 26, format("%4s", Hybrid36.encode(4, resID)));
2148         // List<Atom> moleculeAtoms = molecule.getAtomList();
2149         List<Atom> moleculeAtoms = molecule.getAtomList().stream()
2150                 .filter(a -> !atomExclusions.contains(a)).collect(Collectors.toList());
2151         boolean altLocFound = false;
2152         for (Atom atom : moleculeAtoms) {
2153           writeAtom(atom, serial++, sb, anisouSB, bw);
2154           Character altLoc = atom.getAltLoc();
2155           if (altLoc != null && !altLoc.equals(' ')) {
2156             altLocFound = true;
2157           }
2158         }
2159         // Write out alternate conformers
2160         if (altLocFound) {
2161           for (int ma = 1; ma < molecularAssemblies.length; ma++) {
2162             MolecularAssembly altMolecularAssembly = molecularAssemblies[ma];
2163             MSNode altmolecule = altMolecularAssembly.getMolecules().get(i);
2164             moleculeAtoms = altmolecule.getAtomList();
2165             for (Atom atom : moleculeAtoms) {
2166               if (atom.getAltLoc() != null && !atom.getAltLoc().equals(' ') && !atom.getAltLoc()
2167                       .equals('A')) {
2168                 writeAtom(atom, serial++, sb, anisouSB, bw);
2169               }
2170             }
2171           }
2172         }
2173       }
2174 
2175       List<MSNode> ions = activeMolecularAssembly.getIons();
2176       for (int i = 0; i < ions.size(); i++) {
2177         Molecule ion = (Molecule) ions.get(i);
2178         chainID = ion.getChainID();
2179         sb.setCharAt(21, chainID);
2180         String resName = ion.getResidueName();
2181         int resID = ion.getResidueNumber();
2182         if (resName.length() > 3) {
2183           resName = resName.substring(0, 3);
2184         }
2185         sb.replace(17, 20, padLeft(resName.toUpperCase(), 3));
2186         sb.replace(22, 26, format("%4s", Hybrid36.encode(4, resID)));
2187         // List<Atom> ionAtoms = ion.getAtomList();
2188         List<Atom> ionAtoms = ion.getAtomList().stream().filter(a -> !atomExclusions.contains(a))
2189                 .collect(Collectors.toList());
2190         boolean altLocFound = false;
2191         for (Atom atom : ionAtoms) {
2192           writeAtom(atom, serial++, sb, anisouSB, bw);
2193           Character altLoc = atom.getAltLoc();
2194           if (altLoc != null && !altLoc.equals(' ')) {
2195             altLocFound = true;
2196           }
2197         }
2198         // Write out alternate conformers
2199         if (altLocFound) {
2200           for (int ma = 1; ma < molecularAssemblies.length; ma++) {
2201             MolecularAssembly altMolecularAssembly = molecularAssemblies[ma];
2202             MSNode altion = altMolecularAssembly.getIons().get(i);
2203             ionAtoms = altion.getAtomList();
2204             for (Atom atom : ionAtoms) {
2205               if (atom.getAltLoc() != null && !atom.getAltLoc().equals(' ') && !atom.getAltLoc()
2206                       .equals('A')) {
2207                 writeAtom(atom, serial++, sb, anisouSB, bw);
2208               }
2209             }
2210           }
2211         }
2212       }
2213 
2214       List<MSNode> water = activeMolecularAssembly.getWater();
2215       for (int i = 0; i < water.size(); i++) {
2216         Molecule wat = (Molecule) water.get(i);
2217         chainID = wat.getChainID();
2218         sb.setCharAt(21, chainID);
2219         String resName = wat.getResidueName();
2220         int resID = wat.getResidueNumber();
2221         if (resName.length() > 3) {
2222           resName = resName.substring(0, 3);
2223         }
2224         sb.replace(17, 20, padLeft(resName.toUpperCase(), 3));
2225         sb.replace(22, 26, format("%4s", Hybrid36.encode(4, resID)));
2226         List<Atom> waterAtoms = wat.getAtomList().stream().filter(a -> !atomExclusions.contains(a))
2227                 .collect(Collectors.toList());
2228         boolean altLocFound = false;
2229         for (Atom atom : waterAtoms) {
2230           writeAtom(atom, serial++, sb, anisouSB, bw);
2231           Character altLoc = atom.getAltLoc();
2232           if (altLoc != null && !altLoc.equals(' ')) {
2233             altLocFound = true;
2234           }
2235         }
2236         // Write out alternate conformers
2237         if (altLocFound) {
2238           for (int ma = 1; ma < molecularAssemblies.length; ma++) {
2239             MolecularAssembly altMolecularAssembly = molecularAssemblies[ma];
2240             MSNode altwater = altMolecularAssembly.getWater().get(i);
2241             waterAtoms = altwater.getAtomList();
2242             for (Atom atom : waterAtoms) {
2243               if (atom.getAltLoc() != null && !atom.getAltLoc().equals(' ') && !atom.getAltLoc()
2244                       .equals('A')) {
2245                 writeAtom(atom, serial++, sb, anisouSB, bw);
2246               }
2247             }
2248           }
2249         }
2250       }
2251 
2252       if (model != null) {
2253         bw.write("ENDMDL");
2254         bw.newLine();
2255       }
2256 
2257       if (writeEnd) {
2258         bw.write("END");
2259         bw.newLine();
2260       }
2261 
2262       if (nSymOp >= 0) {
2263         serialP1 = serial;
2264       }
2265 
2266     } catch (Exception e) {
2267       String message = "Exception writing to file: " + saveFile;
2268       logger.log(Level.WARNING, message, e);
2269       return false;
2270     }
2271     return true;
2272   }
2273 
2274   /**
2275    * {@inheritDoc}
2276    *
2277    * <p>Write out the Atomic information in PDB format.
2278    */
2279   @Override
2280   public boolean writeFile(File saveFile, boolean append) {
2281     return writeFile(saveFile, append, false, true);
2282   }
2283 
2284   public boolean writeFile(File saveFile, boolean append, String[] extraLines) {
2285     return writeFile(saveFile, append, Collections.emptySet(), false, !append, extraLines);
2286   }
2287 
2288   /**
2289    * Writes out the atomic information in PDB format.
2290    *
2291    * @param saveFile The file to save information to.
2292    * @param append True if the current data should be appended to the saveFile (as in arc
2293    *     files).
2294    * @param versioning True if the saveFile should be versioned. False if the saveFile should be
2295    *     overwritten.
2296    * @return Success of writing.
2297    */
2298   public boolean writeFile(File saveFile, boolean append, boolean versioning) {
2299     return writeFile(saveFile, append, Collections.emptySet(), true, versioning);
2300   }
2301 
2302   /**
2303    * writeFileWithHeader.
2304    *
2305    * @param saveFile a {@link java.io.File} object.
2306    * @param header a {@link java.lang.String} object.
2307    * @param append a boolean.
2308    * @return a boolean.
2309    */
2310   public boolean writeFileWithHeader(File saveFile, String header, boolean append) {
2311     // Set standardize atom names to false in the presence of deuterium
2312     List<Atom> deuteriumAtoms = new ArrayList<>();
2313     for(Atom atom: activeMolecularAssembly.getAtomArray()){
2314       if(atom.getName().startsWith("D")){
2315         String name = atom.getName().replace("D","H");
2316         atom.setName(name);
2317         deuteriumAtoms.add(atom);
2318       }
2319     }
2320     if (standardizeAtomNames) {
2321       logger.info(" Setting atom names to PDB standard.");
2322       renameAtomsToPDBStandard(activeMolecularAssembly);
2323     }
2324 
2325     for(Atom atom: activeMolecularAssembly.getAtomArray()){
2326       if(deuteriumAtoms.contains(atom) && atom.getName().startsWith("H")){
2327         String name = atom.getName().replace("H","D");
2328         atom.setName(name);
2329       }
2330     }
2331     activeMolecularAssembly.setFile(saveFile);
2332     activeMolecularAssembly.setName(saveFile.getName());
2333 
2334     try (FileWriter fw = new FileWriter(saveFile, append); BufferedWriter bw = new BufferedWriter(
2335             fw)) {
2336       bw.write(header);
2337       bw.newLine();
2338     } catch (Exception e) {
2339       String message = " Exception writing to file: " + saveFile;
2340       logger.log(Level.WARNING, message, e);
2341       return false;
2342     }
2343     if (writeFile(saveFile, true)) {
2344       logger.log(Level.INFO, " Wrote PDB to file {0}", saveFile.getPath());
2345       return true;
2346     } else {
2347       logger.log(Level.INFO, " Error writing to file {0}", saveFile.getPath());
2348       return false;
2349     }
2350   }
2351 
2352   /**
2353    * writeFileWithHeader.
2354    *
2355    * @param saveFile a {@link java.io.File} object.
2356    * @param header a {@link java.lang.String} object.
2357    * @return a boolean.
2358    */
2359   public boolean writeFileWithHeader(File saveFile, String header) {
2360     return writeFileWithHeader(saveFile, header, true);
2361   }
2362 
2363   /**
2364    * writeFileWithHeader.
2365    *
2366    * @param saveFile a {@link java.io.File} object.
2367    * @param header a {@link java.lang.StringBuilder} object.
2368    * @return a boolean.
2369    */
2370   public boolean writeFileWithHeader(File saveFile, StringBuilder header) {
2371     return writeFileWithHeader(saveFile, header.toString());
2372   }
2373 
2374   /**
2375    * Get unique SegID for possibly duplicate chain IDs.
2376    *
2377    * @param c chain ID just read.
2378    * @return a unique segID.
2379    */
2380   private String getExistingSegID(Character c) {
2381     if (c.equals(' ')) {
2382       c = 'A';
2383     }
2384 
2385     // If the chain ID has not changed, return the existing segID.
2386     if (c.equals(currentChainID)) {
2387       return currentSegID;
2388     } else {
2389       currentChainID = null;
2390     }
2391 
2392     List<String> segIDs = segidMap.get(c);
2393     if (segIDs != null) {
2394       if (segIDs.size() > 1) {
2395         if (currentSegID == null) {
2396           currentChainID = c;
2397           currentSegID = segIDs.get(0);
2398           return segIDs.get(0);
2399         } else if (currentSegID.length() == 1) {
2400           currentChainID = c;
2401           currentSegID = segIDs.get(1);
2402           return segIDs.get(1);
2403         } else if (currentSegID.length() == 2) {
2404           String s = currentSegID.substring(0,1);
2405           int num = -2;
2406           try {
2407             num = Integer.parseInt(s);
2408           } catch (NumberFormatException e) {
2409             logger.severe(" SegID of length 2 does not start with an integer.");
2410           }
2411           currentChainID = c;
2412           currentSegID = segIDs.get(num+1);
2413           return segIDs.get(num+1);
2414         } else {
2415           logger.info(" Too many repeated chains. Using single letter for segID.");
2416         }
2417       }
2418       return segIDs.get(0);
2419     } else {
2420       logger.log(Level.INFO, format(" Creating SegID for to chain %s", c));
2421       return getSegID(c);
2422     }
2423   }
2424 
2425   /**
2426    * Convert possibly duplicate chain IDs into unique segIDs.
2427    *
2428    * @param c chain ID just read.
2429    * @return a unique segID.
2430    */
2431   private String getSegID(Character c) {
2432     if (c.equals(' ')) {
2433       c = 'A';
2434     }
2435 
2436     // If the chain ID has not changed, return the existing segID.
2437     if (c.equals(currentChainID)) {
2438       return currentSegID;
2439     }
2440 
2441     // Loop through existing segIDs to find the first one that is unused.
2442     int count = 0;
2443     for (String segID : segIDs) {
2444       if (segID.endsWith(c.toString())) {
2445         count++;
2446       }
2447     }
2448 
2449     // If the count is greater than 0, then append it.
2450     String newSegID;
2451     if (count == 0) {
2452       newSegID = c.toString();
2453     } else {
2454       newSegID = count + c.toString();
2455     }
2456     segIDs.add(newSegID);
2457     currentChainID = c;
2458     currentSegID = newSegID;
2459 
2460     if (segidMap.containsKey(c)) {
2461       segidMap.get(c).add(newSegID);
2462     } else {
2463       List<String> newChainList = new ArrayList<>();
2464       newChainList.add(newSegID);
2465       segidMap.put(c, newChainList);
2466     }
2467 
2468     return newSegID;
2469   }
2470 
2471   /**
2472    * writeAtom
2473    *
2474    * @param atom a {@link ffx.potential.bonded.Atom} object.
2475    * @param serial a int.
2476    * @param sb a {@link java.lang.StringBuilder} object.
2477    * @param anisouSB a {@link java.lang.StringBuilder} object.
2478    * @param bw a {@link java.io.BufferedWriter} object.
2479    * @throws java.io.IOException if any.
2480    */
2481   private void writeAtom(Atom atom, int serial, StringBuilder sb, StringBuilder anisouSB,
2482                          BufferedWriter bw) throws IOException {
2483     String name = atom.getName();
2484     int nameLength = name.length();
2485     if (nameLength > 4) {
2486       name = name.substring(0, 4);
2487     } else if (nameLength == 1) {
2488       name = name + "  ";
2489     } else if (nameLength == 2) {
2490       if (atom.getAtomType().valence == 0) {
2491         name = name + "  ";
2492       } else {
2493         name = name + " ";
2494       }
2495     }
2496     double[] xyz = vdwH ? atom.getRedXYZ() : atom.getXYZ(null);
2497     if (nSymOp >= 0) {
2498       Crystal crystal = activeMolecularAssembly.getCrystal().getUnitCell();
2499       SymOp symOp = crystal.spaceGroup.getSymOp(nSymOp);
2500       double[] newXYZ = new double[xyz.length];
2501       crystal.applySymOp(xyz, newXYZ, symOp);
2502       if (lValue > 0 || mValue > 0 || nValue > 0) {
2503         double[] translation = new double[] {lValue, mValue, nValue};
2504         crystal.getUnitCell().toCartesianCoordinates(translation, translation);
2505         newXYZ[0] += translation[0];
2506         newXYZ[1] += translation[1];
2507         newXYZ[2] += translation[2];
2508       }
2509       xyz = newXYZ;
2510     }
2511     sb.replace(6, 16, format("%5s " + padLeft(name.toUpperCase(), 4), Hybrid36.encode(5, serial)));
2512     Character altLoc = atom.getAltLoc();
2513     sb.setCharAt(16, Objects.requireNonNullElse(altLoc, ' '));
2514 
2515 
2516     /*
2517      * On the following code:
2518      * #1: StringBuilder.replace will allow for longer strings, expanding the StringBuilder's length if necessary.
2519      *
2520      * #2: sb was never re-initialized, so if there was overflow,
2521      * sb would continue to be > 80 characters long, resulting in broken PDB files
2522      *
2523      * #3: It may be wiser to have XYZ coordinates result in shutdown, not
2524      * truncation of coordinates. #4: Excessive B-factors aren't much of an
2525      * issue; if the B-factor is past 999.99, that's the difference between
2526      * "density extends to Venus" and "density extends to Pluto".
2527      */
2528     StringBuilder decimals = new StringBuilder();
2529     for (int i = 0; i < 3; i++) {
2530       try {
2531         decimals.append(StringUtils.fwFpDec(xyz[i], 8, 3));
2532       } catch (IllegalArgumentException ex) {
2533         String newValue = StringUtils.fwFpTrunc(xyz[i], 8, 3);
2534         logger.info(format(" XYZ %d coordinate %8.3f for atom %s "
2535                 + "overflowed bounds of 8.3f string specified by PDB "
2536                 + "format; truncating value to %s", i, xyz[i], atom, newValue));
2537         decimals.append(newValue);
2538       }
2539     }
2540     try {
2541       decimals.append(StringUtils.fwFpDec(atom.getOccupancy(), 6, 2));
2542     } catch (IllegalArgumentException ex) {
2543       logger.severe(
2544               format(" Occupancy %f for atom %s is impossible; " + "value must be between 0 and 1",
2545                       atom.getOccupancy(), atom));
2546     }
2547     try {
2548       decimals.append(StringUtils.fwFpDec(atom.getTempFactor(), 6, 2));
2549     } catch (IllegalArgumentException ex) {
2550       String newValue = StringUtils.fwFpTrunc(atom.getTempFactor(), 6, 2);
2551       logger.info(format(" Atom temp factor %6.2f for atom %s overflowed "
2552                       + "bounds of 6.2f string specified by PDB format; truncating " + "value to %s",
2553               atom.getTempFactor(), atom, newValue));
2554       decimals.append(newValue);
2555     }
2556     sb.replace(30, 66, decimals.toString());
2557 
2558     name = Atom.ElementSymbol.values()[atom.getAtomicNumber() - 1].toString();
2559     name = name.toUpperCase();
2560     if (atom.isDeuterium()) {
2561       name = "D";
2562     }
2563     sb.replace(76, 78, padLeft(name, 2));
2564     sb.replace(78, 80, format("%2d", 0));
2565     bw.write(sb.toString());
2566     bw.newLine();
2567     // =============================================================================
2568     //  1 - 6        Record name   "ANISOU"
2569     //  7 - 11       Integer       serial         Atom serial number.
2570     // 13 - 16       Atom          name           Atom name.
2571     // 17            Character     altLoc         Alternate location indicator
2572     // 18 - 20       Residue name  resName        Residue name.
2573     // 22            Character     chainID        Chain identifier.
2574     // 23 - 26       Integer       resSeq         Residue sequence number.
2575     // 27            AChar         iCode          Insertion code.
2576     // 29 - 35       Integer       u[0][0]        U(1,1)
2577     // 36 - 42       Integer       u[1][1]        U(2,2)
2578     // 43 - 49       Integer       u[2][2]        U(3,3)
2579     // 50 - 56       Integer       u[0][1]        U(1,2)
2580     // 57 - 63       Integer       u[0][2]        U(1,3)
2581     // 64 - 70       Integer       u[1][2]        U(2,3)
2582     // 77 - 78       LString(2)    element        Element symbol, right-justified.
2583     // 79 - 80       LString(2)    charge         Charge on the atom.
2584     // =============================================================================
2585     double[] anisou = atom.getAnisou(null);
2586     if (anisou != null) {
2587       anisouSB.replace(6, 80, sb.substring(6, 80));
2588       anisouSB.replace(28, 70,
2589               format("%7d%7d%7d%7d%7d%7d", (int) (anisou[0] * 1e4), (int) (anisou[1] * 1e4),
2590                       (int) (anisou[2] * 1e4), (int) (anisou[3] * 1e4), (int) (anisou[4] * 1e4),
2591                       (int) (anisou[5] * 1e4)));
2592       bw.write(anisouSB.toString());
2593       bw.newLine();
2594     }
2595   }
2596 
2597   /** PDB records that are recognized. */
2598   private enum Record {
2599     ANISOU, ATOM, CONECT, CRYST1, DBREF, END, MODEL, ENDMDL, HELIX, HETATM, LINK, MTRIX1, MTRIX2, MTRIX3, MODRES, SEQRES, SHEET, SSBOND, REMARK
2600   }
2601 
2602   /** Presently, VERSION3_3 is default, and VERSION3_2 is anything non-standard. */
2603   public enum PDBFileStandard {
2604     VERSION2_3, VERSION3_0, VERSION3_1, VERSION3_2, VERSION3_3
2605   }
2606 
2607   public static class Mutation {
2608 
2609     /** Residue ID of the residue to mutate. */
2610     final int resID;
2611     /** Residue name after mutation. */
2612     final String resName;
2613     /** Character for the chain ID of the residue that will be mutated. */
2614     final char chainChar;
2615     /** Residue name before mutation. */
2616     String origResName;
2617     /** The 4 x,y,z glycosyl torsion atom coordinates. */
2618     double[][] glyco = new double[4][3];
2619     /** The 4 atom indices of glycosyl torsion atoms. */
2620     int[] glycoAtomIndex = new int[4];
2621     /** Index correction. */
2622     int indCorr;
2623 
2624     public Mutation(int resID, char chainChar, String newResName) {
2625       newResName = newResName.toUpperCase();
2626       if (newResName.length() != 3) {
2627         logger.log(Level.WARNING, format("Invalid mutation target: %s.", newResName));
2628       }
2629       int isAA = AminoAcidUtils.getAminoAcidNumber(newResName);
2630       int isNA = NucleicAcidUtils.getNucleicAcidNumber(newResName);
2631       if (isAA == -1 && isNA == -1) {
2632         logger.log(Level.WARNING, format("Invalid mutation target: %s.", newResName));
2633       }
2634       this.resID = resID;
2635       this.chainChar = chainChar;
2636       this.resName = newResName;
2637 
2638       String alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2639       indCorr = alphabet.indexOf(chainChar);
2640     }
2641 
2642     /**
2643      * Check to see if an atom is involved in the mutated base's glycosyl torsion. If the mutation is a switch from
2644      * purine to pyrimidine or vice versa, it will return '~name', meaning the name should be replaced but to include it
2645      * as an alchemical atom.
2646      * @param atomName atom name to check
2647      * @return new name to use if it is involved in glycosyl torsion, null otherwise
2648      */
2649     public String isNonAlchemicalAtom(String atomName) {
2650       if (isWtPurine()) {
2651         if (atomName.equals("N9")) {
2652           if (isMtnPyrimidine()) {
2653             return "~N1";
2654           }
2655           return atomName;
2656         } else if (atomName.equals("C4")) {
2657           if (isMtnPyrimidine()) {
2658             return "~C2";
2659           }
2660           return atomName;
2661         }
2662         return null;
2663       }
2664 
2665       if (isWtPyrimidine()) {
2666         if (atomName.equals("N1")) {
2667           if (isMtnPurine()) {
2668             // here
2669             return "~N9";
2670           }
2671           return atomName;
2672         } else if (atomName.equals("C2")) {
2673           if (isMtnPurine()) {
2674             // here
2675             return "~C4";
2676           }
2677           return atomName;
2678         }
2679         return null;
2680       }
2681 
2682       return null;
2683     }
2684 
2685     /**
2686      * Determines what atoms should be alchemical for a purine to purine or pyrimidine to pyrimidine mutation.
2687      * @param isWriting true if writing the pdb, false if reading the pdb
2688      * @return ArrayList of alchemical atoms, null if not a pur-pur or pyr-pyr mutation
2689      */
2690     public ArrayList<String> getAlchemicalAtoms(boolean isWriting) {
2691       // Log warning that the mutation input is the same residue and return nothing so prev. functionality is not changed
2692       if (resName.equals(origResName)) {
2693         logger.severe("Desired Mutation residue is the same as the original.");
2694         return null;
2695       }
2696 
2697       boolean purpur;
2698       if (isMtnPurine() && isWtPurine()) {
2699         purpur = true;
2700       } else if (isMtnPyrimidine() && isWtPyrimidine()) {
2701         purpur = false;
2702       } else {
2703         // Return nothing so previous functionality is not changed
2704         return null;
2705       }
2706 
2707       String res;
2708 
2709       // look at the mutation residue if writing or the original (wild type) residue if reading
2710       if (isWriting) {
2711         res = resName;
2712       } else {
2713         res = origResName;
2714       }
2715 
2716       ArrayList<String> list = new ArrayList<>();
2717       if (purpur) { // purine: either A to G or G to A
2718         if (res.equals("DAD") || res.equals("DA")) { // MTN is A
2719           list.add("N6");
2720           list.add("H61");
2721           list.add("H62");
2722           list.add("H2");
2723         } else { // MTN is G
2724           list.add("H1");
2725           list.add("N2");
2726           list.add("H21");
2727           list.add("H22");
2728           list.add("O6");
2729         }
2730       } else { // pyrimidine: either T to C or C to T
2731         if (res.equals("DTY") || res.equals("DT")) { // MTN is T
2732           list.add("H3");
2733           list.add("O4");
2734           list.add("C7");
2735           list.add("H71");
2736           list.add("H72");
2737           list.add("H73");
2738         } else { // MTN is C
2739           list.add("N4");
2740           list.add("H41");
2741           list.add("H42");
2742           list.add("H5");
2743         }
2744       }
2745       return list;
2746     }
2747 
2748     /**
2749      * Determine if the mutation residue is purine.
2750      * @return true if mutation is purine
2751      */
2752     public boolean isMtnPurine() {
2753       return resName.equals("DA") || resName.equals("DG") || resName.equals("DAD") || resName.equals("DGU");
2754     }
2755 
2756     /**
2757      * Determine if the mutation residue is pyrimidine.
2758      * @return true if mutation is pyrimidine
2759      */
2760     public boolean isMtnPyrimidine() {
2761       return resName.equals("DC") || resName.equals("DT") || resName.equals("DCY") || resName.equals("DTY");
2762     }
2763 
2764     /**
2765      * Determine if original (wild type) residue is purine.
2766      * @return true if original residue is purine
2767      */
2768     public boolean isWtPurine() {
2769       return origResName.equals("DA") || origResName.equals("DG") || origResName.equals("DAD") || origResName.equals("DGU");
2770     }
2771 
2772     /**
2773      * Determine if original (wild type) residue is pyrimidine.
2774      * @return true if original residue is pyrimidine
2775      */
2776     public boolean isWtPyrimidine() {
2777       return origResName.equals("DT") || origResName.equals("DC") || origResName.equals("DTY") || origResName.equals("DCY");
2778     }
2779 
2780     public void addTors(Atom atom, int index) {
2781       String name = atom.getName();
2782       if (name.equals("O4'")) {
2783         atom.getXYZ(glyco[0]);
2784         glycoAtomIndex[0] = index - indCorr;
2785       } else if (name.equals("C1'")) {
2786         atom.getXYZ(glyco[1]);
2787         glycoAtomIndex[1] = index - indCorr;
2788       } else if (name.equals("N9") || (name.equals("N1") && isMtnPyrimidine())) {
2789         atom.getXYZ(glyco[2]);
2790         glycoAtomIndex[2] = index - indCorr;
2791       } else if ((name.equals("C4") && isMtnPurine()) || (name.equals("C2") && isMtnPyrimidine())) {
2792         atom.getXYZ(glyco[3]);
2793         glycoAtomIndex[3] = index - indCorr;
2794       }
2795     }
2796 
2797     public void calculateTorsion() {
2798       double tors = toDegrees(DoubleMath.dihedralAngle(glyco[0], glyco[1], glyco[2], glyco[3]));
2799       double delta;
2800       if (tors > 0) {
2801         delta = tors - 180.0;
2802       } else {
2803         delta = tors + 180.0;
2804       }
2805       logger.info(format("restrain-torsion %d %d %d %d  5.000  %f 1", glycoAtomIndex[0], glycoAtomIndex[1], glycoAtomIndex[2], glycoAtomIndex[3], delta));
2806     }
2807   }
2808 }