View Javadoc
1   // ******************************************************************************
2   //
3   // Title:       Force Field X.
4   // Description: Force Field X - Software for Molecular Biophysics.
5   // Copyright:   Copyright (c) Michael J. Schnieders 2001-2025.
6   //
7   // This file is part of Force Field X.
8   //
9   // Force Field X is free software; you can redistribute it and/or modify it
10  // under the terms of the GNU General Public License version 3 as published by
11  // the Free Software Foundation.
12  //
13  // Force Field X is distributed in the hope that it will be useful, but WITHOUT
14  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16  // details.
17  //
18  // You should have received a copy of the GNU General Public License along with
19  // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple
20  // Place, Suite 330, Boston, MA 02111-1307 USA
21  //
22  // Linking this library statically or dynamically with other modules is making a
23  // combined work based on this library. Thus, the terms and conditions of the
24  // GNU General Public License cover the whole combination.
25  //
26  // As a special exception, the copyright holders of this library give you
27  // permission to link this library with independent modules to produce an
28  // executable, regardless of the license terms of these independent modules, and
29  // to copy and distribute the resulting executable under terms of your choice,
30  // provided that you also meet, for each linked independent module, the terms
31  // and conditions of the license of that module. An independent module is a
32  // module which is not derived from or based on this library. If you modify this
33  // library, you may extend this exception to your version of the library, but
34  // you are not obligated to do so. If you do not wish to do so, delete this
35  // exception statement from your version.
36  //
37  // ******************************************************************************
38  package ffx.potential.parsers;
39  
40  import static ffx.potential.bonded.BondedUtils.numberAtoms;
41  import static ffx.potential.bonded.NamingUtils.renameAtomsToPDBStandard;
42  import static ffx.potential.bonded.PolymerUtils.assignAtomTypes;
43  import static ffx.potential.bonded.PolymerUtils.buildDisulfideBonds;
44  import static ffx.potential.bonded.PolymerUtils.buildMissingResidues;
45  import static ffx.potential.bonded.PolymerUtils.locateDisulfideBonds;
46  import static ffx.potential.parsers.PDBFilter.PDBFileStandard.VERSION3_2;
47  import static ffx.potential.parsers.PDBFilter.PDBFileStandard.VERSION3_3;
48  import static ffx.utilities.StringUtils.padLeft;
49  import static java.lang.Double.parseDouble;
50  import static java.lang.Integer.parseInt;
51  import static java.lang.String.format;
52  import static org.apache.commons.lang3.StringUtils.repeat;
53  import static org.apache.commons.math3.util.FastMath.min;
54  
55  import ffx.crystal.Crystal;
56  import ffx.crystal.SpaceGroup;
57  import ffx.crystal.SpaceGroupDefinitions;
58  import ffx.crystal.SpaceGroupInfo;
59  import ffx.crystal.SymOp;
60  import ffx.potential.MolecularAssembly;
61  import ffx.potential.Utilities.FileType;
62  import ffx.potential.bonded.*;
63  import ffx.potential.bonded.AminoAcidUtils.AminoAcid3;
64  import ffx.potential.parameters.ForceField;
65  import ffx.utilities.Hybrid36;
66  import ffx.utilities.StringUtils;
67  import java.io.BufferedReader;
68  import java.io.BufferedWriter;
69  import java.io.File;
70  import java.io.FileNotFoundException;
71  import java.io.FileReader;
72  import java.io.FileWriter;
73  import java.io.IOException;
74  import java.util.ArrayList;
75  import java.util.Collections;
76  import java.util.HashMap;
77  import java.util.List;
78  import java.util.Map;
79  import java.util.Objects;
80  import java.util.OptionalDouble;
81  import java.util.Set;
82  import java.util.logging.Level;
83  import java.util.logging.Logger;
84  import java.util.regex.Matcher;
85  import java.util.regex.Pattern;
86  import java.util.stream.Collectors;
87  import org.apache.commons.configuration2.CompositeConfiguration;
88  
89  /**
90   * The PDBFilter class parses data from a Protein DataBank (*.PDB) file. The following records are
91   * recognized: ANISOU, ATOM, CONECT, CRYST1, END, HELIX, HETATM, LINK, SHEET, SSBOND, REMARK. The
92   * rest are currently ignored.
93   *
94   * @author Michael J. Schnieders
95   * @see <a href="http://www.wwpdb.org/documentation/format32/v3.2.html">PDB format 3.2</a>
96   * @since 1.0
97   */
98  public final class PDBFilter extends SystemFilter {
99  
100   private static final Logger logger = Logger.getLogger(PDBFilter.class.getName());
101   private static final Set<String> backboneNames;
102   private static final Set<String> constantPhBackboneNames;
103   private static final Set<String> naBackboneNames;
104 
105   static {
106     String[] names = {"C", "CA", "N", "O", "OXT", "OT2"};
107     backboneNames = Set.of(names);
108 
109     String[] constantPhNames = {"C", "CA", "N", "O", "OXT", "OT2", "H", "HA", "H1", "H2", "H3"};
110     constantPhBackboneNames = Set.of(constantPhNames);
111 
112     String[] naNames = {"P", "OP1", "OP2", "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "C1'"};
113     naBackboneNames = Set.of(naNames);
114   }
115 
116   /** Map of SEQRES entries. */
117   private final Map<Character, String[]> seqRes = new HashMap<>();
118   /** Map of DBREF entries. */
119   private final Map<Character, int[]> dbRef = new HashMap<>();
120   /** List of altLoc characters seen in the PDB file. */
121   private final List<Character> altLocs = new ArrayList<>();
122   /**
123    * List of segIDs defined for the PDB file.
124    *
125    * <p>The expectation is for chain naming from A-Z, then from 0-9. For large systems, chain names
126    * are sometimes reused due to limitations in the PDB format.
127    *
128    * <p>However, we define segIDs to always be unique. For the first A-Z,0-9 series chainID ==
129    * segID. Then, for second A-Z,0-9 series, the segID = 1A-1Z,10-19, and for the third series segID
130    * = 2A-2Z,20-29, and so on.
131    */
132   private final List<String> segIDs = new ArrayList<>();
133 
134   private final Map<Character, List<String>> segidMap = new HashMap<>();
135   /** Maps a chain to the number of insertion codes encountered in that chain. */
136   private final Map<Character, Integer> insertionCodeCount = new HashMap<>();
137   /**
138    * Maps chainIDResNumInsCode to renumbered chainIDResNum. For example, residue 52A in chain C might
139    * be renumbered to residue 53, and mapped as "C52A" to "C53".
140    */
141   private final Map<String, String> pdbToNewResMap = new HashMap<>();
142   /** List of modified residues * */
143   private final Map<String, String> modRes = new HashMap<>();
144   /** Keep track of ATOM record serial numbers to match them with ANISOU records. */
145   private final HashMap<Integer, Atom> atoms = new HashMap<>();
146 
147   private final Map<MolecularAssembly, BufferedReader> readers = new HashMap<>();
148   /** The current altLoc - i.e., the one we are defining a chemical system for. */
149   private Character currentAltLoc = 'A';
150   /** Character for the current chain ID. */
151   private Character currentChainID = null;
152   /** String for the current SegID. */
153   private String currentSegID = null;
154   /** Flag to indicate a mutation is requested. */
155   private boolean mutate = false;
156   private List<Mutation> mutations = null;
157   private List<Integer> resNumberList = null;
158   /** Flag to indicate if missing fields should be printed (i.e. missing B-factors). */
159   private boolean printMissingFields = true;
160   /** Number of symmetry operators when expanding to a P1 unit cell (-1 saves as current spacegroup). */
161   private int nSymOp = -1;
162   /** Number of replicates in A lattice direction (-1 defaults to unit cell). */
163   private int lValue = -1;
164   /** Number of replicates in B lattice direction (-1 defaults to unit cell). */
165   private int mValue = -1;
166   /** Number of replicates in C lattice direction (-1 defaults to unit cell). */
167   private int nValue = -1;
168   /**
169    * The serial field continues from the previous asymmetric unit when expanding to P1. This is not
170    * used when saving as the current spacegroup.
171    */
172   private int serialP1 = 0;
173   /** Assume current standard. */
174   private PDBFileStandard fileStandard = VERSION3_3;
175   /** If false, skip logging "Saving file". */
176   private boolean logWrites = true;
177   /** Keep track of the current MODEL in the file. */
178   private int modelsRead = 1;
179   /** Tracks output MODEL numbers. Unused if below zero. */
180   private int modelsWritten = -1;
181   /** Replicates vector dimensions if saving as expanded. */
182   private int[] lmn = new int[]{1,1,1};
183   private String versionFileName;
184 
185   private final File readFile;
186   private List<String> remarkLines = Collections.emptyList();
187   private double lastReadLambda = Double.NaN;
188 
189   /**
190    * If true, read in titratable residues in their fully protonated form.
191    */
192   private boolean constantPH = false;
193   /**
194    * If true, read in titratable residues in their protonated form.
195    */
196   private boolean rotamerTitration = false;
197   /**
198    * List of residue to rename for constantPH simulations.
199    */
200   private static final HashMap<AminoAcid3, AminoAcid3> constantPHResidueMap = new HashMap<>();
201 
202   static {
203     // Lysine
204     constantPHResidueMap.put(AminoAcidUtils.AminoAcid3.LYD, AminoAcidUtils.AminoAcid3.LYS);
205     // Cysteine
206     constantPHResidueMap.put(AminoAcidUtils.AminoAcid3.CYD, AminoAcidUtils.AminoAcid3.CYS);
207     // Histidine
208     constantPHResidueMap.put(AminoAcidUtils.AminoAcid3.HID, AminoAcidUtils.AminoAcid3.HIS);
209     constantPHResidueMap.put(AminoAcidUtils.AminoAcid3.HIE, AminoAcidUtils.AminoAcid3.HIS);
210     // Aspartate
211     constantPHResidueMap.put(AminoAcidUtils.AminoAcid3.ASP, AminoAcidUtils.AminoAcid3.ASD);
212     constantPHResidueMap.put(AminoAcidUtils.AminoAcid3.ASH, AminoAcidUtils.AminoAcid3.ASD);
213     // Glutamate
214     constantPHResidueMap.put(AminoAcidUtils.AminoAcid3.GLU, AminoAcidUtils.AminoAcid3.GLD);
215     constantPHResidueMap.put(AminoAcidUtils.AminoAcid3.GLH, AminoAcidUtils.AminoAcid3.GLD);
216   }
217 
218   /**
219    * List of residue to rename for rotamer titration simulations.
220    */
221   private static final HashMap<AminoAcid3, AminoAcid3> rotamerResidueMap = new HashMap<>();
222 
223   static {
224     // Lysine
225     rotamerResidueMap.put(AminoAcidUtils.AminoAcid3.LYD, AminoAcidUtils.AminoAcid3.LYS);
226     // Histidine
227     rotamerResidueMap.put(AminoAcidUtils.AminoAcid3.HID, AminoAcidUtils.AminoAcid3.HIS);
228     rotamerResidueMap.put(AminoAcidUtils.AminoAcid3.HIE, AminoAcidUtils.AminoAcid3.HIS);
229     // Aspartate
230     rotamerResidueMap.put(AminoAcidUtils.AminoAcid3.ASP, AminoAcidUtils.AminoAcid3.ASH);
231     // Glutamate
232     rotamerResidueMap.put(AminoAcidUtils.AminoAcid3.GLU, AminoAcidUtils.AminoAcid3.GLH);
233     //Cysteine
234     rotamerResidueMap.put(AminoAcidUtils.AminoAcid3.CYD, AminoAcidUtils.AminoAcid3.CYS);
235   }
236 
237   /**
238    * Constructor for PDBFilter.
239    *
240    * @param files a {@link java.util.List} object.
241    * @param molecularAssembly a {@link ffx.potential.MolecularAssembly} object.
242    * @param forceField a {@link ffx.potential.parameters.ForceField} object.
243    * @param properties a {@link org.apache.commons.configuration2.CompositeConfiguration}
244    *     object.
245    */
246   public PDBFilter(List<File> files, MolecularAssembly molecularAssembly, ForceField forceField,
247                    CompositeConfiguration properties) {
248     super(files, molecularAssembly, forceField, properties);
249     bondList = new ArrayList<>();
250     this.fileType = FileType.PDB;
251     readFile = files.get(0);
252   }
253 
254   /**
255    * Parse the PDB File from a URL.
256    *
257    * @param file a {@link java.io.File} object.
258    * @param molecularAssembly a {@link ffx.potential.MolecularAssembly} object.
259    * @param forceField a {@link ffx.potential.parameters.ForceField} object.
260    * @param properties a {@link org.apache.commons.configuration2.CompositeConfiguration}
261    *     object.
262    */
263   public PDBFilter(File file, MolecularAssembly molecularAssembly, ForceField forceField,
264                    CompositeConfiguration properties) {
265     super(file, molecularAssembly, forceField, properties);
266     bondList = new ArrayList<>();
267     this.fileType = FileType.PDB;
268     readFile = file;
269   }
270 
271   /**
272    * Parse the PDB File from a URL.
273    *
274    * @param file a {@link java.io.File} object.
275    * @param molecularAssemblies a {@link java.util.List} object.
276    * @param forceField a {@link ffx.potential.parameters.ForceField} object.
277    * @param properties a {@link org.apache.commons.configuration2.CompositeConfiguration}
278    *     object.
279    */
280   public PDBFilter(File file, List<MolecularAssembly> molecularAssemblies, ForceField forceField,
281                    CompositeConfiguration properties) {
282     super(file, molecularAssemblies, forceField, properties);
283     bondList = new ArrayList<>();
284     this.fileType = FileType.PDB;
285     readFile = file;
286   }
287 
288   /**
289    * Constructor for PDBFilter with residue numbers.
290    *
291    * @param file a {@link java.util.List} object.
292    * @param molecularAssembly a {@link ffx.potential.MolecularAssembly} object.
293    * @param forceField a {@link ffx.potential.parameters.ForceField} object.
294    * @param properties a {@link org.apache.commons.configuration2.CompositeConfiguration}
295    *     object.
296    * @param resNumberList a List of integer residue numbers for constant pH rotamer
297    *     optimization.
298    */
299   public PDBFilter(File file, MolecularAssembly molecularAssembly, ForceField forceField,
300                    CompositeConfiguration properties, List<Integer> resNumberList) {
301     super(file, molecularAssembly, forceField, properties);
302     bondList = new ArrayList<>();
303     this.fileType = FileType.PDB;
304     this.readFile = file;
305     this.resNumberList = resNumberList;
306     //this.chainList = chainList;
307   }
308 
309 
310   /**
311    * Simple method useful for converting files to PDB format.
312    *
313    * @param atom a {@link ffx.potential.bonded.Atom} object.
314    * @return Returns a PDB ATOM or HETATM record for the passed Atom.
315    */
316   public static String toPDBAtomLine(Atom atom) {
317     StringBuilder sb;
318     if (atom.isHetero()) {
319       sb = new StringBuilder("HETATM");
320     } else {
321       sb = new StringBuilder("ATOM  ");
322     }
323     sb.append(repeat(" ", 74));
324 
325     String name = atom.getName();
326     int nameLength = name.length();
327     if (nameLength > 4) {
328       name = name.substring(0, 4);
329     } else if (nameLength == 1) {
330       name = name + "  ";
331     } else if (nameLength == 2) {
332       name = name + " ";
333     }
334     int serial = atom.getXyzIndex();
335     sb.replace(6, 16, format("%5s " + padLeft(name.toUpperCase(), 4), Hybrid36.encode(5, serial)));
336 
337     Character altLoc = atom.getAltLoc();
338     if (altLoc != null) {
339       sb.setCharAt(16, altLoc);
340     } else {
341       char blankChar = ' ';
342       sb.setCharAt(16, blankChar);
343     }
344 
345     String resName = atom.getResidueName();
346     sb.replace(17, 20, padLeft(resName.toUpperCase(), 3));
347 
348     char chain = atom.getChainID();
349     sb.setCharAt(21, chain);
350 
351     int resID = atom.getResidueNumber();
352     sb.replace(22, 26, format("%4s", Hybrid36.encode(4, resID)));
353 
354     double[] xyz = atom.getXYZ(null);
355     StringBuilder decimals = new StringBuilder();
356     for (int i = 0; i < 3; i++) {
357       try {
358         decimals.append(StringUtils.fwFpDec(xyz[i], 8, 3));
359       } catch (IllegalArgumentException ex) {
360         String newValue = StringUtils.fwFpTrunc(xyz[i], 8, 3);
361         logger.info(
362                 format(" XYZ coordinate %8.3f for atom %s overflowed PDB format and is truncated to %s.",
363                         xyz[i], atom, newValue));
364         decimals.append(newValue);
365       }
366     }
367     try {
368       decimals.append(StringUtils.fwFpDec(atom.getOccupancy(), 6, 2));
369     } catch (IllegalArgumentException ex) {
370       logger.severe(
371               format(" Occupancy %6.2f for atom %s must be between 0 and 1.", atom.getOccupancy(),
372                       atom));
373     }
374     try {
375       decimals.append(StringUtils.fwFpDec(atom.getTempFactor(), 6, 2));
376     } catch (IllegalArgumentException ex) {
377       String newValue = StringUtils.fwFpTrunc(atom.getTempFactor(), 6, 2);
378       logger.info(
379               format(" B-factor %6.2f for atom %s overflowed the PDB format and is truncated to %s.",
380                       atom.getTempFactor(), atom, newValue));
381       decimals.append(newValue);
382     }
383 
384     sb.replace(30, 66, decimals.toString());
385     sb.replace(78, 80, format("%2d", 0));
386     sb.append("\n");
387     return sb.toString();
388   }
389 
390   public void setConstantPH(boolean constantPH) {
391     this.constantPH = constantPH;
392   }
393 
394   public void setRotamerTitration(boolean rotamerTitration) {
395     this.rotamerTitration = rotamerTitration;
396   }
397 
398   /** clearSegIDs */
399   public void clearSegIDs() {
400     segIDs.clear();
401   }
402 
403   /** {@inheritDoc} */
404   @Override
405   public void closeReader() {
406     for (MolecularAssembly system : systems) {
407       BufferedReader br = readers.get(system);
408       if (br != null) {
409         try {
410           br.close();
411         } catch (IOException ex) {
412           logger.warning(format(" Exception in closing system %s: %s", system.toString(), ex));
413         }
414       }
415     }
416   }
417 
418   @Override
419   public int countNumModels() {
420     Set<File> files = systems.stream().map(MolecularAssembly::getFile).map(File::toString).distinct()
421             .map(File::new).collect(Collectors.toSet());
422 
423     // Dangers of parallelism are minimized by: unique files/filenames, read-only access.
424     return files.parallelStream().mapToInt((File fi) -> {
425       int nModelsLocal = 0;
426       try (BufferedReader br = new BufferedReader(new FileReader(fi))) {
427         String line = br.readLine();
428         while (line != null) {
429           if (line.startsWith("MODEL")) {
430             ++nModelsLocal;
431           }
432           line = br.readLine();
433         }
434         nModelsLocal = Math.max(1, nModelsLocal);
435       } catch (IOException ex) {
436         logger.info(format(" Exception in parsing file %s: %s", fi, ex));
437       }
438       return nModelsLocal;
439     }).sum();
440   }
441 
442   /**
443    * Get the list of alternate locations encountered.
444    *
445    * @return the alternate location list.
446    */
447   public List<Character> getAltLocs() {
448     return altLocs;
449   }
450 
451   /** {@inheritDoc} */
452   @Override
453   public OptionalDouble getLastReadLambda() {
454     return Double.isNaN(lastReadLambda) ? OptionalDouble.empty() : OptionalDouble.of(lastReadLambda);
455   }
456 
457   /**
458    * Returns all the remark lines found by the last readFile call.
459    *
460    * @return Remark lines from the last readFile call.
461    */
462   @Override
463   public String[] getRemarkLines() {
464     int nRemarks = remarkLines.size();
465     return remarkLines.toArray(new String[nRemarks]);
466   }
467 
468   @Override
469   public int getSnapshot() {
470     return modelsRead;
471   }
472 
473   /**
474    * Mutate residue(s) as the PDB file is being parsed.
475    *
476    * @param mutations a {@link java.util.List} object.
477    */
478   public void mutate(List<Mutation> mutations) {
479     mutate = true;
480     if (this.mutations == null) {
481       this.mutations = new ArrayList<>();
482     }
483     this.mutations.addAll(mutations);
484   }
485 
486   /** Parse the PDB File */
487   @Override
488   public boolean readFile() {
489     remarkLines = new ArrayList<>();
490     // First atom is #1, to match xyz file format
491     int xyzIndex = 1;
492     setFileRead(false);
493     systems.add(activeMolecularAssembly);
494 
495     List<String> conects = new ArrayList<>();
496     List<String> links = new ArrayList<>();
497     List<String> ssbonds = new ArrayList<>();
498     List<String> structs = new ArrayList<>();
499     try {
500       for (File file : files) {
501         currentFile = file;
502         if (mutate) {
503           List<Character> chainIDs = new ArrayList<>();
504           try (BufferedReader br = new BufferedReader(new FileReader(file))) {
505             String line = br.readLine();
506             while (line != null) {
507               // Replace all tabs w/ 4x spaces
508               line = line.replaceAll("\t", "    ");
509               String identity = line;
510               if (line.length() > 6) {
511                 identity = line.substring(0, 6);
512               }
513               identity = identity.trim().toUpperCase();
514               Record record;
515               try {
516                 record = Record.valueOf(identity);
517               } catch (Exception e) {
518                 // Continue until the record is recognized.
519                 line = br.readLine();
520                 continue;
521               }
522               switch (record) {
523                 case ANISOU, HETATM, ATOM -> {
524                   char c22 = line.charAt(21);
525                   boolean idFound = false;
526                   for (Character chainID : chainIDs) {
527                     if (c22 == chainID) {
528                       idFound = true;
529                       break;
530                     }
531                   }
532                   if (!idFound) {
533                     chainIDs.add(c22);
534                   }
535                 }
536               }
537               line = br.readLine();
538             }
539             for (Mutation mtn : mutations) {
540               if (!chainIDs.contains(mtn.chainChar)) {
541                 if (chainIDs.size() == 1) {
542                   logger.warning(
543                           format(" Chain ID %c for mutation not found: only one chain %c found.",
544                                   mtn.chainChar, chainIDs.get(0)));
545                 } else {
546                   logger.warning(
547                           format(" Chain ID %c for mutation not found: mutation will not proceed.",
548                                   mtn.chainChar));
549                 }
550               }
551             }
552           } catch (IOException ioException) {
553             logger.fine(format(" Exception %s in parsing file to find chain IDs", ioException));
554           }
555         }
556 
557         // Check that the current file exists and that we can read it.
558         if (currentFile == null || !currentFile.exists() || !currentFile.canRead()) {
559           return false;
560         }
561 
562         // Open the current file for parsing.
563         try (BufferedReader br = new BufferedReader(new FileReader(currentFile))) {
564           // Echo the alternate location being parsed.
565           if (currentAltLoc == 'A') {
566             logger.info(format(" Reading %s", currentFile.getName()));
567           } else {
568             logger.info(format(" Reading %s alternate location %s", currentFile.getName(), currentAltLoc));
569 
570           }
571           activeMolecularAssembly.setAlternateLocation(currentAltLoc);
572 
573           // Reset the current chain and segID.
574           currentChainID = null;
575           currentSegID = null;
576           boolean containsInsCode = false;
577 
578           // Read the first line of the file.
579           String line = br.readLine();
580 
581           // Parse until END or ENDMDL is found, or to the end of the file.
582           while (line != null) {
583             // Replace all tabs w/ 4x spaces
584             line = line.replaceAll("\t", "    ");
585             String identity = line;
586             if (line.length() > 6) {
587               identity = line.substring(0, 6);
588             }
589             identity = identity.trim().toUpperCase();
590             Record record;
591             try {
592               record = Record.valueOf(identity);
593             } catch (Exception e) {
594               // Continue until the record is recognized.
595               line = br.readLine();
596               continue;
597             }
598 
599             // Switch on the known record.
600             switch (record) {
601               case ENDMDL:
602               case END:
603                 // Setting "line" to null will exit the loop.
604                 line = null;
605                 continue;
606               case DBREF:
607 // =============================================================================
608 //  1 -  6       Record name   "DBREF "
609 //  8 - 11       IDcode        idCode             ID code of this entry.
610 // 13            Character     chainID            Chain identifier.
611 // 15 - 18       Integer       seqBegin           Initial sequence number of the
612 //                                                PDB sequence segment.
613 // 19            AChar         insertBegin        Initial insertion code of the
614 //                                                PDB sequence segment.
615 // 21 - 24       Integer       seqEnd             Ending sequence number of the
616 //                                                PDB sequence segment.
617 // 25            AChar         insertEnd          Ending insertion code of the
618 //                                                PDB sequence segment.
619 // 27 - 32       LString       database           Sequence database name.
620 // 34 - 41       LString       dbAccession        Sequence database accession code.
621 // 43 - 54       LString       dbIdCode           Sequence  database identification code.
622 // 56 - 60       Integer       dbseqBegin         Initial sequence number of the
623 //                                                database seqment.
624 // 61            AChar         idbnsBeg           Insertion code of initial residue of the
625 //                                                segment, if PDB is the reference.
626 // 63 - 67       Integer       dbseqEnd           Ending sequence number of the
627 //                                                database segment.
628 // 68            AChar         dbinsEnd           Insertion code of the ending residue of
629 //                                                the segment, if PDB is the reference.
630 // =============================================================================
631                 Character chainID = line.substring(12, 13).toUpperCase().charAt(0);
632                 int seqBegin = parseInt(line.substring(14, 18).trim());
633                 int seqEnd = parseInt(line.substring(20, 24).trim());
634                 int[] seqRange = dbRef.computeIfAbsent(chainID, k -> new int[2]);
635                 seqRange[0] = seqBegin;
636                 seqRange[1] = seqEnd;
637                 break;
638               case SEQRES:
639 // =============================================================================
640 //  1 -  6        Record name    "SEQRES"
641 //  8 - 10        Integer        serNum       Serial number of the SEQRES record for the
642 //                                            current  chain. Starts at 1 and increments
643 //                                            by one  each line. Reset to 1 for each chain.
644 // 12             Character      chainID      Chain identifier. This may be any single
645 //                                            legal  character, including a blank which is
646 //                                            is used if there is only one chain.
647 // 14 - 17        Integer        numRes       Number of residues in the chain.
648 //                                            This  value is repeated on every record.
649 // 20 - 22        Residue name   resName      Residue name.
650 // 24 - 26        Residue name   resName      Residue name.
651 // 28 - 30        Residue name   resName      Residue name.
652 // 32 - 34        Residue name   resName      Residue name.
653 // 36 - 38        Residue name   resName      Residue name.
654 // 40 - 42        Residue name   resName      Residue name.
655 // 44 - 46        Residue name   resName      Residue name.
656 // 48 - 50        Residue name   resName      Residue name.
657 // 52 - 54        Residue name   resName      Residue name.
658 // 56 - 58        Residue name   resName      Residue name.
659 // 60 - 62        Residue name   resName      Residue name.
660 // 64 - 66        Residue name   resName      Residue name.
661 // 68 - 70        Residue name   resName      Residue name.
662 // =============================================================================
663                 activeMolecularAssembly.addHeaderLine(line);
664                 chainID = line.substring(11, 12).toUpperCase().charAt(0);
665                 int serNum = parseInt(line.substring(7, 10).trim());
666                 String[] chain = seqRes.get(chainID);
667                 int numRes = parseInt(line.substring(13, 17).trim());
668                 if (chain == null) {
669                   chain = new String[numRes];
670                   seqRes.put(chainID, chain);
671                 }
672                 int resID = (serNum - 1) * 13;
673                 int end = line.length();
674                 for (int start = 19; start + 3 <= end; start += 4) {
675                   String res = line.substring(start, start + 3).trim();
676                   if (res.isEmpty()) {
677                     break;
678                   }
679                   chain[resID++] = res;
680                 }
681                 break;
682               case MODRES:
683                 String modResName = line.substring(12, 15).trim();
684                 String stdName = line.substring(24, 27).trim();
685                 modRes.put(modResName.toUpperCase(), stdName.toUpperCase());
686                 activeMolecularAssembly.addHeaderLine(line);
687 // =============================================================================
688 //  1 -  6        Record name     "MODRES"
689 //  8 - 11        IDcode          idCode         ID code of this entry.
690 // 13 - 15        Residue name    resName        Residue name used in this entry.
691 // 17             Character       chainID        Chain identifier.
692 // 19 - 22        Integer         seqNum         Sequence number.
693 // 23             AChar           iCode          Insertion code.
694 // 25 - 27        Residue name    stdRes         Standard residue name.
695 // 30 - 70        String          comment        Description of the residue modification.
696 // =============================================================================
697                 break;
698               case ANISOU:
699 // =============================================================================
700 //  1 - 6        Record name   "ANISOU"
701 //  7 - 11       Integer       serial         Atom serial number.
702 // 13 - 16       Atom          name           Atom name.
703 // 17            Character     altLoc         Alternate location indicator
704 // 18 - 20       Residue name  resName        Residue name.
705 // 22            Character     chainID        Chain identifier.
706 // 23 - 26       Integer       resSeq         Residue sequence number.
707 // 27            AChar         iCode          Insertion code.
708 // 29 - 35       Integer       u[0][0]        U(1,1)
709 // 36 - 42       Integer       u[1][1]        U(2,2)
710 // 43 - 49       Integer       u[2][2]        U(3,3)
711 // 50 - 56       Integer       u[0][1]        U(1,2)
712 // 57 - 63       Integer       u[0][2]        U(1,3)
713 // 64 - 70       Integer       u[1][2]        U(2,3)
714 // 77 - 78       LString(2)    element        Element symbol, right-justified.
715 // 79 - 80       LString(2)    charge         Charge on the atom.
716 // =============================================================================
717                 boolean deleteAnisou = properties.getBoolean("delete-anisou", false);
718                 double resetBfactors = properties.getDouble("reset-bfactors", -1.0);
719                 if (deleteAnisou || resetBfactors >= 0.0) {
720                   break;
721                 }
722                 Integer serial = Hybrid36.decode(5, line.substring(6, 11));
723                 Character altLoc = line.substring(16, 17).toUpperCase().charAt(0);
724                 if (!altLocs.contains(altLoc)) {
725                   altLocs.add(altLoc);
726                 }
727                 if (!altLoc.equals(' ') && !altLoc.equals('A') && !altLoc.equals(currentAltLoc)) {
728                   break;
729                 }
730                 double[] adp = new double[6];
731                 adp[0] = parseInt(line.substring(28, 35).trim()) * 1.0e-4;
732                 adp[1] = parseInt(line.substring(35, 42).trim()) * 1.0e-4;
733                 adp[2] = parseInt(line.substring(42, 49).trim()) * 1.0e-4;
734                 adp[3] = parseInt(line.substring(49, 56).trim()) * 1.0e-4;
735                 adp[4] = parseInt(line.substring(56, 63).trim()) * 1.0e-4;
736                 adp[5] = parseInt(line.substring(63, 70).trim()) * 1.0e-4;
737                 if (atoms.containsKey(serial)) {
738                   Atom a = atoms.get(serial);
739                   a.setAltLoc(altLoc);
740                   a.setAnisou(adp);
741                 } else {
742                   logger.info(
743                           format(" No ATOM record for ANISOU serial number %d has been found.", serial));
744                   logger.info(format(" This ANISOU record will be ignored:\n %s", line));
745                 }
746                 break;
747               case ATOM:
748 // =============================================================================
749 //  1 -  6        Record name   "ATOM  "
750 //  7 - 11        Integer       serial       Atom serial number.
751 // 13 - 16        Atom          name         Atom name.
752 // 17             Character     altLoc       Alternate location indicator.
753 // 18 - 20        Residue name  resName      Residue name.
754 // 22             Character     chainID      Chain identifier.
755 // 23 - 26        Integer       resSeq       Residue sequence number.
756 // 27             AChar         iCode        Code for insertion of residues.
757 // 31 - 38        Real(8.3)     x            Orthogonal coordinates for X in Angstroms.
758 // 39 - 46        Real(8.3)     y            Orthogonal coordinates for Y in Angstroms.
759 // 47 - 54        Real(8.3)     z            Orthogonal coordinates for Z in Angstroms.
760 // 55 - 60        Real(6.2)     occupancy    Occupancy.
761 // 61 - 66        Real(6.2)     tempFactor   Temperature factor.
762 // 77 - 78        LString(2)    element      Element symbol, right-justified.
763 // 79 - 80        LString(2)    charge       Charge  on the atom.
764 // =============================================================================
765                 String name;
766                 String resName;
767                 String segID;
768                 int resSeq;
769                 boolean printAtom;
770                 double[] d;
771                 double occupancy;
772                 double tempFactor;
773                 Atom newAtom;
774                 Atom returnedAtom;
775                 // If it's a misnamed water, it will fall through to HETATM.
776                 if (!line.substring(17, 20).trim().equals("HOH")) {
777                   serial = Hybrid36.decode(5, line.substring(6, 11));
778                   name = line.substring(12, 16).trim();
779                   if (name.toUpperCase().contains("1H") || name.toUpperCase().contains("2H")
780                           || name.toUpperCase().contains("3H")) {
781                     // VERSION3_2 is presently just a placeholder for "anything non-standard".
782                     fileStandard = VERSION3_2;
783                   }
784                   altLoc = line.substring(16, 17).toUpperCase().charAt(0);
785                   if (!altLocs.contains(altLoc)) {
786                     altLocs.add(altLoc);
787                   }
788 
789                   if (!altLoc.equals(' ') && !altLoc.equals('A') && !altLoc.equals(currentAltLoc)) {
790                     break;
791                   }
792 
793                   resName = line.substring(17, 20).trim();
794                   chainID = line.substring(21, 22).charAt(0);
795                   segID = getSegID(chainID);
796                   resSeq = Hybrid36.decode(4, line.substring(22, 26));
797 
798                   char insertionCode = line.charAt(26);
799                   if (insertionCode != ' ' && !containsInsCode) {
800                     containsInsCode = true;
801                     logger.warning(
802                             " FFX support for files with " + "insertion codes is experimental. "
803                                     + "Residues will be renumbered to " + "eliminate insertion codes (52A "
804                                     + "becomes 53, 53 becomes 54, etc)");
805                   }
806 
807                   int offset = insertionCodeCount.getOrDefault(chainID, 0);
808                   String pdbResNum = format("%c%d%c", chainID, resSeq, insertionCode);
809                   if (!pdbToNewResMap.containsKey(pdbResNum)) {
810                     if (insertionCode != ' ') {
811                       ++offset;
812                       insertionCodeCount.put(chainID, offset);
813                     }
814                     resSeq += offset;
815                     if (offset != 0) {
816                       logger.info(
817                               format(" Chain %c " + "residue %s-%s renumbered to %c %s-%d", chainID,
818                                       pdbResNum.substring(1).trim(), resName, chainID, resName, resSeq));
819                     }
820                     String newNum = format("%c%d", chainID, resSeq);
821                     pdbToNewResMap.put(pdbResNum, newNum);
822                   } else {
823                     resSeq += offset;
824                   }
825 
826                   printAtom = false;
827                   if (mutate) {
828                     boolean doBreak = false;
829                     for (Mutation mtn : mutations) {
830                       if (chainID == mtn.chainChar && resSeq == mtn.resID) {
831                         mtn.origResName = resName;
832                         resName = mtn.resName;
833                         String atomName = name.toUpperCase();
834 
835                         int isAA = AminoAcidUtils.getAminoAcidNumber(resName);
836                         int isNA = NucleicAcidUtils.getNucleicAcidNumber(resName);
837 
838                         if ((isNA != -1 && naBackboneNames.contains(atomName)) || (isAA != -1 && backboneNames.contains(atomName))) {
839                           printAtom = true;
840                         } else {
841                           // grab pur-pur or pyr-pyr alchem. atoms
842                           ArrayList<String> alchAtoms = mtn.getAlchemicalAtoms(false);
843                           if (alchAtoms == null) {
844                             // test to see if atom is involved in glycosyl torsion and if needs renaming (pyr-pur/pur-pyr)
845                             String newName = mtn.isNonAlchemicalAtom(atomName);
846                             if (newName != null) { // if not null -- use name it
847                               printAtom = true;
848                               if (newName.startsWith("~")) { // switch from purine to pyrmidine or v.v.
849                                 // switch name and include it as an alchemical atom
850                                 name = newName.substring(1);
851                                 logger.info(format(" DELETING atom %d %s of %s %d in chain %s", serial, atomName, resName, resSeq, chainID));
852                               } else {
853                                 // replace name but do not include as an alchemical atom
854                                 name = newName;
855                               }
856                               doBreak = false;
857                             } else if (!atomName.contains("'")) {
858                               logger.info(format(" DELETING atom %d %s of %s %d in chain %s", serial, atomName, resName, resSeq, chainID));
859                               doBreak = true;
860                             } else {
861                               printAtom = true;
862                               doBreak = false;
863                             }
864                           } else {
865                             if (alchAtoms.contains(atomName) && !atomName.contains("'")) {
866                               logger.info(format(" DELETING atom %d %s of %s %d in chain %s", serial, atomName, resName, resSeq, chainID));
867                               doBreak = true;
868                             } else {
869                               printAtom = true;
870                               doBreak = false;
871                             }
872                           }
873                           break;
874                         }
875                       }
876                     }
877                     if (doBreak) {
878                       break;
879                     }
880                   }
881 
882                   if (constantPH) {
883                     AminoAcid3 aa3 = AminoAcidUtils.getAminoAcid(resName.toUpperCase());
884                     if (constantPHResidueMap.containsKey(aa3)) {
885                       String atomName = name.toUpperCase();
886                       AminoAcid3 aa3PH = constantPHResidueMap.get(aa3);
887                       resName = aa3PH.name();
888                       if (constantPhBackboneNames.contains(atomName)) {
889                         logger.info(format(" %s-%d %s", resName, resSeq, atomName));
890                       } else if (!atomName.startsWith("H")) {
891                         logger.info(format(" %s-%d %s", resName, resSeq, atomName));
892                       } else {
893                         logger.info(format(" %s-%d %s skipped", resName, resSeq, atomName));
894                         break;
895                       }
896                     }
897                   } else if (rotamerTitration) {
898                     AminoAcid3 aa3 = AminoAcidUtils.getAminoAcid(resName.toUpperCase());
899                     if (rotamerResidueMap.containsKey(aa3) && resNumberList.contains(resSeq)) {
900                       AminoAcid3 aa3rotamer = rotamerResidueMap.get(aa3);
901                       resName = aa3rotamer.name();
902                     }
903                   }
904                   d = new double[3];
905                   d[0] = parseDouble(line.substring(30, 38).trim());
906                   d[1] = parseDouble(line.substring(38, 46).trim());
907                   d[2] = parseDouble(line.substring(46, 54).trim());
908                   occupancy = 1.0;
909                   tempFactor = 1.0;
910                   try {
911                     occupancy = parseDouble(line.substring(54, 60).trim());
912                     tempFactor = parseDouble(line.substring(60, 66).trim());
913                   } catch (NumberFormatException | StringIndexOutOfBoundsException e) {
914                     // Use default values.
915                     if (printMissingFields) {
916                       logger.info(" Missing occupancy and b-factors set to 1.0.");
917                       printMissingFields = false;
918                     } else if (logger.isLoggable(Level.FINE)) {
919                       logger.fine(" Missing occupancy and b-factors set to 1.0.");
920                     }
921                   }
922 
923                   double bfactor = properties.getDouble("reset-bfactors", -1.0);
924                   if (bfactor >= 0.0) {
925                     tempFactor = bfactor;
926                   }
927 
928                   newAtom = new Atom(0, name, altLoc, d, resName, resSeq, chainID, occupancy, tempFactor, segID);
929 
930                   // Check if this is a modified residue.
931                   if (modRes.containsKey(resName.toUpperCase())) {
932                     newAtom.setModRes(true);
933                   }
934                   returnedAtom = (Atom) activeMolecularAssembly.addMSNode(newAtom);
935                   if (returnedAtom != newAtom) {
936                     // A previously added atom has been retained.
937                     atoms.put(serial, returnedAtom);
938                     if (logger.isLoggable(Level.FINE)) {
939                       logger.fine(returnedAtom + " has been retained over\n" + newAtom);
940                     }
941                   } else {
942                     // The new atom has been added.
943                     atoms.put(serial, newAtom);
944                     // Check if the newAtom took the xyzIndex of a previous alternate conformer.
945                     if (newAtom.getIndex() == 0) {
946                       newAtom.setXyzIndex(xyzIndex++);
947                     }
948                     if (printAtom) {
949                       logger.info(newAtom.toString());
950                     }
951                   }
952                   break;
953                 }
954                 break;
955               case HETATM:
956 // =============================================================================
957 //  1 - 6        Record name    "HETATM"
958 //  7 - 11       Integer        serial        Atom serial number.
959 // 13 - 16       Atom           name          Atom name.
960 // 17            Character      altLoc        Alternate location indicator.
961 // 18 - 20       Residue name   resName       Residue name.
962 // 22            Character      chainID       Chain identifier.
963 // 23 - 26       Integer        resSeq        Residue sequence number.
964 // 27            AChar          iCode         Code for insertion of residues.
965 // 31 - 38       Real(8.3)      x             Orthogonal coordinates for X.
966 // 39 - 46       Real(8.3)      y             Orthogonal coordinates for Y.
967 // 47 - 54       Real(8.3)      z             Orthogonal coordinates for Z.
968 // 55 - 60       Real(6.2)      occupancy     Occupancy.
969 // 61 - 66       Real(6.2)      tempFactor    Temperature factor.
970 // 77 - 78       LString(2)     element       Element symbol; right-justified.
971 // 79 - 80       LString(2)     charge        Charge on the atom.
972 // =============================================================================
973                 serial = Hybrid36.decode(5, line.substring(6, 11));
974                 name = line.substring(12, 16).trim();
975                 altLoc = line.substring(16, 17).toUpperCase().charAt(0);
976                 if (!altLocs.contains(altLoc)) {
977                   altLocs.add(altLoc);
978                 }
979                 if (!altLoc.equals(' ') && !altLoc.equals(currentAltLoc)) {
980                   break;
981                 }
982                 // if (!altLoc.equals(' ') && !altLoc.equals('A') && !altLoc.equals(currentAltLoc)) {
983                 //  break;
984                 //}
985                 resName = line.substring(17, 20).trim();
986                 chainID = line.substring(21, 22).charAt(0);
987                 segID = getSegID(chainID);
988                 resSeq = Hybrid36.decode(4, line.substring(22, 26));
989 
990                 char insertionCode = line.charAt(26);
991                 if (insertionCode != ' ' && !containsInsCode) {
992                   containsInsCode = true;
993                   logger.warning(" FFX support for files with " + "insertion codes is experimental. "
994                           + "Residues will be renumbered to " + "eliminate insertion codes (52A "
995                           + "becomes 53, 53 becomes 54, etc)");
996                 }
997 
998                 int offset = insertionCodeCount.getOrDefault(chainID, 0);
999                 String pdbResNum = format("%c%d%c", chainID, resSeq, insertionCode);
1000                 if (!pdbToNewResMap.containsKey(pdbResNum)) {
1001                   if (insertionCode != ' ') {
1002                     ++offset;
1003                     insertionCodeCount.put(chainID, offset);
1004                   }
1005                   resSeq += offset;
1006                   if (offset != 0) {
1007                     logger.info(
1008                             format(" Chain %c " + "molecule %s-%s renumbered to %c %s-%d", chainID,
1009                                     pdbResNum.substring(1).trim(), resName, chainID, resName, resSeq));
1010                   }
1011                   String newNum = format("%c%d", chainID, resSeq);
1012                   pdbToNewResMap.put(pdbResNum, newNum);
1013                 } else {
1014                   resSeq += offset;
1015                 }
1016 
1017                 d = new double[3];
1018                 d[0] = parseDouble(line.substring(30, 38).trim());
1019                 d[1] = parseDouble(line.substring(38, 46).trim());
1020                 d[2] = parseDouble(line.substring(46, 54).trim());
1021                 occupancy = 1.0;
1022                 tempFactor = 1.0;
1023                 try {
1024                   occupancy = parseDouble(line.substring(54, 60).trim());
1025                   tempFactor = parseDouble(line.substring(60, 66).trim());
1026                 } catch (NumberFormatException | StringIndexOutOfBoundsException e) {
1027                   // Use default values.
1028                   if (printMissingFields) {
1029                     logger.info(" Missing occupancy and b-factors set to 1.0.");
1030                     printMissingFields = false;
1031                   } else if (logger.isLoggable(Level.FINE)) {
1032                     logger.fine(" Missing occupancy and b-factors set to 1.0.");
1033                   }
1034                 }
1035 
1036                 double bfactor = properties.getDouble("reset-bfactors", -1.0);
1037                 if (bfactor >= 0.0) {
1038                   tempFactor = bfactor;
1039                 }
1040 
1041                 newAtom = new Atom(0, name, altLoc, d, resName, resSeq, chainID, occupancy, tempFactor, segID);
1042                 newAtom.setHetero(true);
1043                 // Check if this is a modified residue.
1044                 if (modRes.containsKey(resName.toUpperCase())) {
1045                   newAtom.setModRes(true);
1046                 }
1047                 returnedAtom = (Atom) activeMolecularAssembly.addMSNode(newAtom);
1048                 if (returnedAtom != newAtom) {
1049                   // A previously added atom has been retained.
1050                   atoms.put(serial, returnedAtom);
1051                   if (logger.isLoggable(Level.FINE)) {
1052                     logger.fine(returnedAtom + " has been retained over\n" + newAtom);
1053                   }
1054                 } else {
1055                   // The new atom has been added.
1056                   atoms.put(serial, newAtom);
1057                   newAtom.setXyzIndex(xyzIndex++);
1058                 }
1059                 break;
1060               case CRYST1:
1061 // =============================================================================
1062 // The CRYST1 record presents the unit cell parameters, space group, and Z
1063 // value. If the structure was not determined by crystallographic means, CRYST1
1064 // simply provides the unitary values, with an appropriate REMARK.
1065 //
1066 //  7 - 15       Real(9.3)     a              a (Angstroms).
1067 // 16 - 24       Real(9.3)     b              b (Angstroms).
1068 // 25 - 33       Real(9.3)     c              c (Angstroms).
1069 // 34 - 40       Real(7.2)     alpha          alpha (degrees).
1070 // 41 - 47       Real(7.2)     beta           beta (degrees).
1071 // 48 - 54       Real(7.2)     gamma          gamma (degrees).
1072 // 56 - 66       LString       sGroup         Space  group.
1073 // 67 - 70       Integer       z              Z value.
1074 // =============================================================================
1075                 if (line.length() < 55) {
1076                   logger.severe(" CRYST1 record is improperly formatted.");
1077                 }
1078                 double aaxis = parseDouble(line.substring(6, 15).trim());
1079                 double baxis = parseDouble(line.substring(15, 24).trim());
1080                 double caxis = parseDouble(line.substring(24, 33).trim());
1081                 double alpha = parseDouble(line.substring(33, 40).trim());
1082                 double beta = parseDouble(line.substring(40, 47).trim());
1083                 double gamma = parseDouble(line.substring(47, 54).trim());
1084                 int limit = min(line.length(), 66);
1085                 String sg = line.substring(55, limit).trim();
1086                 properties.addProperty("a-axis", aaxis);
1087                 properties.addProperty("b-axis", baxis);
1088                 properties.addProperty("c-axis", caxis);
1089                 properties.addProperty("alpha", alpha);
1090                 properties.addProperty("beta", beta);
1091                 properties.addProperty("gamma", gamma);
1092                 properties.addProperty("spacegroup", SpaceGroupInfo.pdb2ShortName(sg));
1093                 break;
1094               case CONECT:
1095 // =============================================================================
1096 //  7 - 11        Integer        serial       Atom  serial number
1097 // 12 - 16        Integer        serial       Serial number of bonded atom
1098 // 17 - 21        Integer        serial       Serial number of bonded atom
1099 // 22 - 26        Integer        serial       Serial number of bonded atom
1100 // 27 - 31        Integer        serial       Serial number of bonded atom
1101 //
1102 // CONECT records involving atoms for which the coordinates are not present
1103 // in the entry (e.g., symmetry-generated) are not given.
1104 // CONECT records involving atoms for which the coordinates are missing due
1105 // to disorder, are also not provided.
1106 // =============================================================================
1107                 conects.add(line);
1108                 break;
1109               case LINK:
1110 // =============================================================================
1111 // The LINK records specify connectivity between residues that is not implied by
1112 // the primary structure. Connectivity is expressed in terms of the atom names.
1113 // They also include the distance associated with each linkage following the
1114 // symmetry operations at the end of each record.
1115 // 13 - 16         Atom           name1           Atom name.
1116 // 17              Character      altLoc1         Alternate location indicator.
1117 // 18 - 20         Residue name   resName1        Residue  name.
1118 // 22              Character      chainID1        Chain identifier.
1119 // 23 - 26         Integer        resSeq1         Residue sequence number.
1120 // 27              AChar          iCode1          Insertion code.
1121 // 43 - 46         Atom           name2           Atom name.
1122 // 47              Character      altLoc2         Alternate location indicator.
1123 // 48 - 50         Residue name   resName2        Residue name.
1124 // 52              Character      chainID2        Chain identifier.
1125 // 53 - 56         Integer        resSeq2         Residue sequence number.
1126 // 57              AChar          iCode2          Insertion code.
1127 // 60 - 65         SymOP          sym1            Symmetry operator atom 1.
1128 // 67 - 72         SymOP          sym2            Symmetry operator atom 2.
1129 // 74 – 78         Real(5.2)      Length          Link distance
1130 // =============================================================================
1131                 char a1 = line.charAt(16);
1132                 char a2 = line.charAt(46);
1133                 if (a1 != a2) {
1134                   // logger.info(format(" Ignoring LINK record as alternate locations do not match\n
1135                   // %s.", line));
1136                   break;
1137                 }
1138                 if (currentAltLoc == 'A') {
1139                   if ((a1 == ' ' || a1 == 'A') && (a2 == ' ' || a2 == 'A')) {
1140                     links.add(line);
1141                   }
1142                 } else if (a1 == currentAltLoc && a2 == currentAltLoc) {
1143                   links.add(line);
1144                 }
1145                 break;
1146               case SSBOND:
1147 // =============================================================================
1148 // The SSBOND record identifies each disulfide bond in protein and polypeptide
1149 // structures by identifying the two residues involved in the bond.
1150 // The disulfide bond distance is included after the symmetry operations at
1151 // the end of the SSBOND record.
1152 //
1153 //  8 - 10        Integer         serNum       Serial number.
1154 // 12 - 14        LString(3)      "CYS"        Residue name.
1155 // 16             Character       chainID1     Chain identifier.
1156 // 18 - 21        Integer         seqNum1      Residue sequence number.
1157 // 22             AChar           icode1       Insertion code.
1158 // 26 - 28        LString(3)      "CYS"        Residue name.
1159 // 30             Character       chainID2     Chain identifier.
1160 // 32 - 35        Integer         seqNum2      Residue sequence number.
1161 // 36             AChar           icode2       Insertion code.
1162 // 60 - 65        SymOP           sym1         Symmetry oper for 1st resid
1163 // 67 - 72        SymOP           sym2         Symmetry oper for 2nd resid
1164 // 74 – 78        Real(5.2)      Length        Disulfide bond distance
1165 //
1166 // If SG of cysteine is disordered then there are possible alternate linkages.
1167 // wwPDB practice is to put together all possible SSBOND records. This is
1168 // problematic because the alternate location identifier is not specified in
1169 // the SSBOND record.
1170 //
1171 // Notes:
1172 // SSBOND records may be invalid if chain IDs are reused.
1173 // SSBOND records are applied by FFX to all conformers.
1174 // =============================================================================
1175                 ssbonds.add(line);
1176                 break;
1177               case HELIX:
1178 // =============================================================================
1179 // HELIX records are used to identify the position of helices in the molecule.
1180 // Helices are named, numbered, and classified by type. The residues where the
1181 // helix begins and ends are noted, as well as the total length.
1182 //
1183 //  8 - 10        Integer        serNum        Serial number of the helix. This starts
1184 //                                             at 1  and increases incrementally.
1185 // 12 - 14        LString(3)     helixID       Helix  identifier. In addition to a serial
1186 //                                             number, each helix is given an
1187 //                                             alphanumeric character helix identifier.
1188 // 16 - 18        Residue name   initResName   Name of the initial residue.
1189 // 20             Character      initChainID   Chain identifier for the chain containing
1190 //                                             this  helix.
1191 // 22 - 25        Integer        initSeqNum    Sequence number of the initial residue.
1192 // 26             AChar          initICode     Insertion code of the initial residue.
1193 // 28 - 30        Residue  name  endResName    Name of the terminal residue of the helix.
1194 // 32             Character      endChainID    Chain identifier for the chain containing
1195 //                                             this  helix.
1196 // 34 - 37        Integer        endSeqNum     Sequence number of the terminal residue.
1197 // 38             AChar          endICode      Insertion code of the terminal residue.
1198 // 39 - 40        Integer        helixClass    Helix class (see below).
1199 // 41 - 70        String         comment       Comment about this helix.
1200 // 72 - 76        Integer        length        Length of this helix.
1201 //
1202 //                                      CLASS NUMBER
1203 // TYPE OF  HELIX                     (COLUMNS 39 - 40)
1204 // --------------------------------------------------------------
1205 // Right-handed alpha (default)                1
1206 // Right-handed omega                          2
1207 // Right-handed pi                             3
1208 // Right-handed gamma                          4
1209 // Right-handed 3 - 10                         5
1210 // Left-handed alpha                           6
1211 // Left-handed omega                           7
1212 // Left-handed gamma                           8
1213 // 2 - 7 ribbon/helix                          9
1214 // Polyproline                                10
1215 // =============================================================================
1216               case SHEET:
1217 // =============================================================================
1218 // SHEET records are used to identify the position of sheets in the molecule.
1219 // Sheets are both named and numbered. The residues where the sheet begins and
1220 // ends are noted.
1221 //
1222 //  8 - 10        Integer       strand         Strand  number which starts at 1 for each
1223 //                                             strand within a sheet and increases by one.
1224 // 12 - 14        LString(3)    sheetID        Sheet  identifier.
1225 // 15 - 16        Integer       numStrands     Number  of strands in sheet.
1226 // 18 - 20        Residue name  initResName    Residue  name of initial residue.
1227 // 22             Character     initChainID    Chain identifier of initial residue in strand.
1228 // 23 - 26        Integer       initSeqNum     Sequence number of initial residue in strand.
1229 // 27             AChar         initICode      Insertion code of initial residue in  strand.
1230 // 29 - 31        Residue name  endResName     Residue name of terminal residue.
1231 // 33             Character     endChainID     Chain identifier of terminal residue.
1232 // 34 - 37        Integer       endSeqNum      Sequence number of terminal residue.
1233 // 38             AChar         endICode       Insertion code of terminal residue.
1234 // 39 - 40        Integer       sense          Sense of strand with respect to previous
1235 //                                             strand in the sheet. 0 if first strand,
1236 //                                             1 if  parallel,and -1 if anti-parallel.
1237 // 42 - 45        Atom          curAtom        Registration.  Atom name in current strand.
1238 // 46 - 48        Residue name  curResName     Registration.  Residue name in current strand
1239 // 50             Character     curChainId     Registration. Chain identifier in current strand.
1240 // 51 - 54        Integer       curResSeq      Registration.  Residue sequence number
1241 //                                             in current strand.
1242 // 55             AChar         curICode       Registration. Insertion code in current strand.
1243 // 57 - 60        Atom          prevAtom       Registration.  Atom name in previous strand.
1244 // 61 - 63        Residue name  prevResName    Registration.  Residue name in previous strand.
1245 // 65             Character     prevChainId    Registration.  Chain identifier in previous strand.
1246 // 66 - 69        Integer       prevResSeq     Registration. Residue sequence number
1247 //                                             in previous strand.
1248 // 70             AChar         prevICode      Registration.  Insertion code in
1249 //                                             previous strand.
1250 // =============================================================================
1251                 structs.add(line);
1252                 break;
1253               case MODEL: // Currently, no handling in initial read.
1254                 break;
1255               case MTRIX1:
1256 // ================================================================================
1257 // MTRIXn (n = 1, 2, or 3) records present transformations expressing
1258 // non-crystallographic symmetry.
1259 // MTRIXn will appear only when such transformations are required to generate an
1260 // entire asymmetric unit,
1261 // such as a large viral structure.
1262 //
1263 //  8 - 10        Integer       serial         Serial number.
1264 // 11 - 20        Real(10.6)    m[n][1]        Mn1
1265 // 21 - 30        Real(10.6)    m[n][2]        Mn2
1266 // 31 - 40        Real(10.6)    m[n][3]        Mn3
1267 // 21 - 30        Real(10.6)    v[n]           Vn
1268 // 60             Integer       iGiven         1 if coordinates for the representations which are
1269 //                                              approximately related by the transformations of the
1270 //                                              molecule are contained in the entry. Otherwise, blank.
1271 // =================================================================================
1272                 StringBuilder MTRX1 = new StringBuilder(line.substring(11, 55));
1273                 properties.addProperty("MTRIX1", MTRX1);
1274                 break;
1275               case MTRIX2:
1276                 StringBuilder MTRX2 = new StringBuilder(line.substring(11, 55));
1277                 properties.addProperty("MTRIX2", MTRX2);
1278                 break;
1279               case MTRIX3:
1280                 StringBuilder MTRX3 = new StringBuilder(line.substring(11, 55));
1281                 properties.addProperty("MTRIX3", MTRX3);
1282                 break;
1283               case REMARK:
1284                 remarkLines.add(line.trim());
1285                 if (line.contains("Lambda:")) {
1286                   Matcher m = lambdaPattern.matcher(line);
1287                   if (m.find()) {
1288                     lastReadLambda = Double.parseDouble(m.group(1));
1289                   }
1290                 }
1291 // =================================================================================
1292 // REMARK 350: presents all transformations, both crystallographic and non-crystallographic,
1293 // needed to generate the biomolecule. These transformations operate on the coordinates in the
1294 // entry. Both author and computational descriptions of assemblies are provided, if applicable.
1295 // For strict ncs case where more than one assembly presents in asymmetric unit, only one
1296 // chain with unit matrix will reported in REMARK 350, the other chain will be generated
1297 // by rotation and translation.
1298 //
1299 // 20 - 23        Integer       serial         Serial number.
1300 // 24 - 33        Real(10.6)    m[n][1]        Mn1
1301 // 34 - 43        Real(10.6)    m[n][2]        Mn2
1302 // 44 - 53        Real(10.6)    m[n][3]        Mn3
1303 // 59 - 68        Real(10.6)    v[n]           Vn
1304 // =================================================================================
1305                 if (line.length() >= 68) {
1306                   String remarkType = line.substring(7, 10).trim();
1307                   if (remarkType.matches("\\d+") && parseInt(remarkType) == 350 && line.substring(13,
1308                           18).equalsIgnoreCase("BIOMT")) {
1309                     properties.addProperty("BIOMTn", new StringBuilder(line.substring(24, 68)));
1310                   }
1311                 }
1312                 break;
1313               default:
1314                 break;
1315             }
1316             line = br.readLine();
1317           }
1318 
1319         } catch (FileNotFoundException fileNotFoundException) {
1320           logger.log(Level.SEVERE, " PDB file not found", fileNotFoundException);
1321         }
1322       }
1323       xyzIndex--;
1324       setFileRead(true);
1325     } catch (IOException e) {
1326       logger.exiting(PDBFilter.class.getName(), "readFile", e);
1327       return false;
1328     }
1329 
1330     // Locate disulfide bonds; bond parameters are assigned below.
1331     List<Bond> ssBondList = locateDisulfideBonds(ssbonds, activeMolecularAssembly, pdbToNewResMap);
1332 
1333     // Record the number of atoms read in from the PDB file before applying
1334     // algorithms that may build new atoms.
1335     int pdbAtoms = activeMolecularAssembly.getAtomArray().length;
1336     removeExcessHydrogens();
1337     // Build missing backbone atoms in loops.
1338     buildMissingResidues(xyzIndex, activeMolecularAssembly, seqRes, dbRef);
1339 
1340     // Assign atom types. Missing side-chains atoms and missing hydrogen will be built in.
1341     bondList = assignAtomTypes(activeMolecularAssembly, fileStandard);
1342 
1343     // Assign disulfide bonds parameters and log their creation.
1344     buildDisulfideBonds(ssBondList, activeMolecularAssembly, bondList);
1345 
1346     // Finally, re-number the atoms if missing atoms were created.
1347     int currentN = activeMolecularAssembly.getAtomArray().length;
1348     boolean renumber = forceField.getBoolean("renumber-pdb", false);
1349     if (pdbAtoms != currentN) {
1350       logger.info(format(" Renumbering PDB file due to built atoms (%d vs %d)", currentN, pdbAtoms));
1351       numberAtoms(activeMolecularAssembly);
1352     } else if (renumber) {
1353       logger.info(" Renumbering PDB file due to renumber-pdb flag.");
1354       numberAtoms(activeMolecularAssembly);
1355     }
1356     return true;
1357   }
1358 
1359   public void removeExcessHydrogens(){
1360     logger.info(" Removing excess Hydrogens");
1361     for(Residue residue: activeMolecularAssembly.getResidueList()){
1362       if(residue.getName().equals("ACE") || residue.getName().equals("NME") || residue.getResidueType() != Residue.ResidueType.AA){
1363         break;
1364       }
1365       String trueResName = residue.getAtomByName("CA", true).getResidueName();
1366       Atom atom;
1367       switch (trueResName) {
1368         case "HID", "GLU" -> {
1369           // No HE2
1370           atom = residue.getAtomByName("HE2", true);
1371         }
1372         case "HIE" -> {
1373           // No HD1
1374           atom = residue.getAtomByName("HD1", true);
1375         }
1376         case "ASP" -> {
1377           // No HD2
1378           atom = residue.getAtomByName("HD2", true);
1379         }
1380         case "LYD" -> {
1381           // No HZ3
1382           atom = residue.getAtomByName("HZ3", true);
1383         }
1384         case "CYD" -> {
1385           // No HG
1386           atom = residue.getAtomByName("HG", true);
1387         }
1388         default -> {
1389           atom = null;
1390         }
1391         // Do nothing.
1392       }
1393       if(atom != null){
1394         int index = activeMolecularAssembly.getResidueList().indexOf(residue);
1395         MSNode atoms = residue.getAtomNode();
1396         atoms.remove(atom);
1397         residue.setName(trueResName);
1398         activeMolecularAssembly.getResidueList().set(index, residue);
1399       }
1400     }
1401   }
1402 
1403   /** {@inheritDoc} */
1404   @Override
1405   public boolean readNext() {
1406     return readNext(false);
1407   }
1408 
1409   /** {@inheritDoc} */
1410   @Override
1411   public boolean readNext(boolean resetPosition) {
1412     return readNext(resetPosition, false);
1413   }
1414 
1415   /** {@inheritDoc} */
1416   @Override
1417   public boolean readNext(boolean resetPosition, boolean print) {
1418     return readNext(resetPosition, print, true);
1419   }
1420 
1421   /** {@inheritDoc} */
1422   @Override
1423   @SuppressWarnings("fallthrough")
1424   public boolean readNext(boolean resetPosition, boolean print, boolean parse) {
1425     modelsRead = resetPosition ? 1 : modelsRead + 1;
1426     if (!parse) {
1427       if (print) {
1428         logger.info(format(" Skipped Model %d.", modelsRead));
1429       }
1430       return true;
1431     }
1432     remarkLines = new ArrayList<>(remarkLines.size());
1433     // ^ is beginning of line, \\s+ means "one or more whitespace", (\\d+) means match and capture
1434     // one or more digits.
1435     Pattern modelPatt = Pattern.compile("^MODEL\\s+(\\d+)");
1436     boolean eof = true;
1437     for (MolecularAssembly system : systems) {
1438       try {
1439         BufferedReader currentReader;
1440         if (readers.containsKey(system)) {
1441           currentReader = readers.get(system);
1442           try {
1443             if (!currentReader.ready()) {
1444               currentReader = new BufferedReader(new FileReader(readFile));
1445               // Mark the start of the file.
1446               currentReader.mark(0);
1447               readers.remove(system);
1448               readers.put(system, currentReader);
1449             } else if (resetPosition) {
1450               // If the BufferedReader has been opened, and reset is requested, reset the position.
1451               currentReader.reset();
1452             }
1453           } catch (Exception exception) {
1454             // If all structures in the PDB file have been read, the currentReader may have closed.
1455             // The try block will catch this case and reset to the beginning of the file.
1456             currentReader = new BufferedReader(new FileReader(readFile));
1457             // Mark the start of the file.
1458             currentReader.mark(0);
1459             readers.remove(system);
1460             readers.put(system, currentReader);
1461           }
1462         } else {
1463           currentReader = new BufferedReader(new FileReader(readFile));
1464           // Mark the start of the file.
1465           currentReader.mark(0);
1466           readers.put(system, currentReader);
1467         }
1468 
1469         // Skip to appropriate model.
1470         String line = currentReader.readLine();
1471         while (line != null) {
1472           line = line.trim();
1473           Matcher m = modelPatt.matcher(line);
1474           if (m.find()) {
1475             int modelNum = parseInt(m.group(1));
1476             if (modelNum == modelsRead) {
1477               if (print) {
1478                 logger.log(Level.INFO, format(" Reading model %d for %s", modelNum, currentFile));
1479               }
1480               eof = false;
1481               break;
1482             }
1483           }
1484           line = currentReader.readLine();
1485         }
1486         if (eof) {
1487           if (logger.isLoggable(Level.FINEST)) {
1488             logger.log(Level.FINEST, format("\n End of file reached for %s", readFile));
1489           }
1490           currentReader.close();
1491           return false;
1492         }
1493 
1494         // Begin parsing the model.
1495         currentChainID = null;
1496         currentSegID = null;
1497         boolean modelDone = false;
1498         line = currentReader.readLine();
1499         while (line != null) {
1500           line = line.trim();
1501           String recID = line.substring(0, Math.min(6, line.length())).trim();
1502           try {
1503             Record record = Record.valueOf(recID);
1504             boolean hetatm = true;
1505             switch (record) {
1506               // =============================================================================
1507               //
1508               //  7 - 11        Integer       serial       Atom serial number.
1509               // 13 - 16        Atom          name         Atom name.
1510               // 17             Character     altLoc       Alternate location indicator.
1511               // 18 - 20        Residue name  resName      Residue name.
1512               // 22             Character     chainID      Chain identifier.
1513               // 23 - 26        Integer       resSeq       Residue sequence number.
1514               // 27             AChar         iCode        Code for insertion of residues.
1515               // 31 - 38        Real(8.3)     x            Orthogonal coordinates for X in
1516               // Angstroms.
1517               // 39 - 46        Real(8.3)     y            Orthogonal coordinates for Y in
1518               // Angstroms.
1519               // 47 - 54        Real(8.3)     z            Orthogonal coordinates for Z in
1520               // Angstroms.
1521               // 55 - 60        Real(6.2)     occupancy    Occupancy.
1522               // 61 - 66        Real(6.2)     tempFactor   Temperature factor.
1523               // 77 - 78        LString(2)    element      Element symbol, right-justified.
1524               // 79 - 80        LString(2)    charge       Charge  on the atom.
1525               // =============================================================================
1526               //         1         2         3         4         5         6         7
1527               // 123456789012345678901234567890123456789012345678901234567890123456789012345678
1528               // ATOM      1  N   ILE A  16      60.614  71.140 -10.592  1.00  7.38           N
1529               // ATOM      2  CA  ILE A  16      60.793  72.149  -9.511  1.00  6.91           C
1530               case ATOM:
1531                 hetatm = false;
1532               case HETATM:
1533                 String name = line.substring(12, 16).trim();
1534                 if (name.toUpperCase().contains("1H") || name.toUpperCase().contains("2H")
1535                         || name.toUpperCase().contains("3H")) {
1536                   // VERSION3_2 is presently just a placeholder for "anything non-standard".
1537                   fileStandard = VERSION3_2;
1538                 }
1539                 Character altLoc = line.substring(16, 17).toUpperCase().charAt(0);
1540                 if (!altLoc.equals(' ') && !altLoc.equals(currentAltLoc)) {
1541                   break;
1542                 }
1543                 // if (!altLoc.equals(' ') && !altLoc.equals('A') && !altLoc.equals(currentAltLoc)) {
1544                 //  break;
1545                 // }
1546                 String resName = line.substring(17, 20).trim();
1547                 Character chainID = line.substring(21, 22).charAt(0);
1548                 String segID = getExistingSegID(chainID);
1549                 int resSeq = Hybrid36.decode(4, line.substring(22, 26));
1550                 double[] d = new double[3];
1551                 d[0] = parseDouble(line.substring(30, 38).trim());
1552                 d[1] = parseDouble(line.substring(38, 46).trim());
1553                 d[2] = parseDouble(line.substring(46, 54).trim());
1554                 double occupancy = 1.0;
1555                 double tempFactor = 1.0;
1556                 Atom newAtom = new Atom(0, name, altLoc, d, resName, resSeq, chainID, occupancy,
1557                         tempFactor, segID);
1558                 newAtom.setHetero(hetatm);
1559                 // Check if this is a modified residue.
1560                 if (modRes.containsKey(resName.toUpperCase())) {
1561                   newAtom.setModRes(true);
1562                 }
1563 
1564                 Atom returnedAtom = activeMolecularAssembly.findAtom(newAtom);
1565                 if (returnedAtom != null) {
1566                   returnedAtom.setXYZ(d);
1567                   double[] retXYZ = new double[3];
1568                   returnedAtom.getXYZ(retXYZ);
1569                 } else {
1570                   String message = format(" Could not find atom %s in assembly", newAtom);
1571                   if (dieOnMissingAtom) {
1572                     logger.severe(message);
1573                   } else {
1574                     logger.warning(message);
1575                   }
1576                 }
1577                 break;
1578               case CRYST1:
1579                 // =============================================================================
1580                 // The CRYST1 record presents the unit cell parameters, space group, and Z
1581                 // value. If the structure was not determined by crystallographic means, CRYST1
1582                 // simply provides the unitary values, with an appropriate REMARK.
1583                 //
1584                 //  7 - 15       Real(9.3)     a              a (Angstroms).
1585                 // 16 - 24       Real(9.3)     b              b (Angstroms).
1586                 // 25 - 33       Real(9.3)     c              c (Angstroms).
1587                 // 34 - 40       Real(7.2)     alpha          alpha (degrees).
1588                 // 41 - 47       Real(7.2)     beta           beta (degrees).
1589                 // 48 - 54       Real(7.2)     gamma          gamma (degrees).
1590                 // 56 - 66       LString       sGroup         Space  group.
1591                 // 67 - 70       Integer       z              Z value.
1592                 // =============================================================================
1593                 logger.fine(" Crystal record found.");
1594                 if (line.length() < 55) {
1595                   logger.severe(" CRYST1 record is improperly formatted.");
1596                 }
1597                 double aaxis = parseDouble(line.substring(6, 15).trim());
1598                 double baxis = parseDouble(line.substring(15, 24).trim());
1599                 double caxis = parseDouble(line.substring(24, 33).trim());
1600                 double alpha = parseDouble(line.substring(33, 40).trim());
1601                 double beta = parseDouble(line.substring(40, 47).trim());
1602                 double gamma = parseDouble(line.substring(47, 54).trim());
1603                 int limit = min(line.length(), 66);
1604                 String sg = line.substring(55, limit).trim();
1605 //                properties.clearProperty("a-axis");
1606 //                properties.clearProperty("b-axis");
1607 //                properties.clearProperty("c-axis");
1608 //                properties.clearProperty("alpha");
1609 //                properties.clearProperty("beta");
1610 //                properties.clearProperty("gamma");
1611 //                properties.clearProperty("spacegroup");
1612 //
1613 //                properties.addProperty("a-axis", aaxis);
1614 //                properties.addProperty("b-axis", baxis);
1615 //                properties.addProperty("c-axis", caxis);
1616 //                properties.addProperty("alpha", alpha);
1617 //                properties.addProperty("beta", beta);
1618 //                properties.addProperty("gamma", gamma);
1619 //                properties.addProperty("spacegroup", SpaceGroupInfo.pdb2ShortName(sg));
1620                 Crystal crystal = activeMolecularAssembly.getCrystal();
1621                 SpaceGroup spaceGroup = SpaceGroupDefinitions.spaceGroupFactory(sg);
1622                 if (Objects.equals(crystal.spaceGroup.shortName, spaceGroup.shortName)) {
1623                   crystal.changeUnitCellParameters(aaxis, baxis, caxis, alpha, beta, gamma);
1624                 } else {
1625                   // TODO: Handle changes in space groups... Means recalculating force field terms.
1626                   logger.warning(format(" Original space group %s could not be changed to %s",
1627                           crystal.spaceGroup.shortName, spaceGroup.shortName));
1628                 }
1629                 break;
1630               case ENDMDL:
1631               case END: // Technically speaking, END should be at the end of the file, not end of
1632                 // the model.
1633                 logger.log(Level.FINE, format(" Model %d successfully read", modelsRead));
1634                 modelDone = true;
1635                 break;
1636               case REMARK:
1637                 remarkLines.add(line.trim());
1638                 if (line.contains("Lambda:")) {
1639                   Matcher m = lambdaPattern.matcher(line);
1640                   if (m.find()) {
1641                     lastReadLambda = Double.parseDouble(m.group(1));
1642                   }
1643                 }
1644                 break;
1645               default:
1646                 break;
1647             }
1648           } catch (Exception ex) {
1649             // Do nothing; it's not an ATOM/HETATM line.
1650           }
1651           if (modelDone) {
1652             break;
1653           }
1654           line = currentReader.readLine();
1655         }
1656         return true;
1657       } catch (IOException ex) {
1658         logger.info(
1659                 format(" Exception in parsing frame %d of %s:" + " %s", modelsRead, system.toString(),
1660                         ex));
1661       }
1662     }
1663     return false;
1664   }
1665 
1666   /**
1667    * Specify the alternate location.
1668    *
1669    * @param molecularAssembly The MolecularAssembly to populate.
1670    * @param altLoc The alternate location to use.
1671    */
1672   public void setAltID(MolecularAssembly molecularAssembly, Character altLoc) {
1673     setMolecularSystem(molecularAssembly);
1674     currentAltLoc = altLoc;
1675   }
1676 
1677   /**
1678    * Sets whether this PDBFilter should log each time it saves to a file.
1679    *
1680    * @param logWrites a boolean.
1681    */
1682   public void setLogWrites(boolean logWrites) {
1683     this.logWrites = logWrites;
1684   }
1685 
1686   /**
1687    * setModelNumbering.
1688    *
1689    * @param modelsWritten the number of models written.
1690    */
1691   public void setModelNumbering(int modelsWritten) {
1692     this.modelsWritten = modelsWritten;
1693   }
1694 
1695   public void setLMN(int[] lmn) {
1696     if(lmn[0] >= 1 && lmn[1] >= 1 && lmn[2] >= 1){
1697       this.lmn = lmn;
1698     }else{
1699       // Provided dimensions are not handled. Revert to P1.
1700       this.lmn = new int[]{1,1,1};
1701     }
1702   }
1703 
1704   /**
1705    * setSymOp.
1706    *
1707    * @param symOp a int.
1708    */
1709   public void setSymOp(int symOp) {
1710     this.nSymOp = symOp;
1711   }
1712 
1713   /**
1714    * Expand the current system to P1 during the save operation.
1715    *
1716    * @param file The file to write.
1717    * @return Return true on a successful write.
1718    */
1719   public boolean writeFileAsP1(File file) {
1720     // XYZ File First Line
1721     final int l = lmn[0];
1722     final int m = lmn[1];
1723     final int n = lmn[2];
1724     final int numReplicates = l * m * n;
1725     Crystal crystal = activeMolecularAssembly.getCrystal();
1726     int nSymOps = crystal.getUnitCell().spaceGroup.getNumberOfSymOps();
1727 
1728     if (nSymOps == 1 && l <= 1 && m <= 1 && n <= 1) {
1729       // This is a P1 system.
1730       if (!writeFile(file, false)) {
1731         logger.info(format(" Save failed for %s", activeMolecularAssembly));
1732         return false;
1733       } else {
1734         return true;
1735       }
1736     } else {
1737       Polymer[] polymers = activeMolecularAssembly.getChains();
1738       int chainCount = 0;
1739       for (Polymer polymer : polymers) {
1740         Character chainID = Polymer.CHAIN_IDS.charAt(chainCount++);
1741         polymer.setChainID(chainID);
1742         polymer.setSegID(chainID.toString());
1743       }
1744       nSymOp = 0;
1745       logWrites = false;
1746       boolean writeEnd = false;
1747       if (!writeFile(file, false, false, writeEnd)) {
1748         logger.info(format(" Save failed for %s", activeMolecularAssembly));
1749         return false;
1750       } else {
1751         for (int i = 0; i < l; i++) {
1752           for (int j = 0; j < m; j++) {
1753             for (int k = 0; k < n; k++) {
1754               lValue = i;
1755               mValue = j;
1756               nValue = k;
1757               for (int iSym = 0; iSym < nSymOps; iSym++) {
1758                 nSymOp = iSym;
1759                 for (Polymer polymer : polymers) {
1760                   Character chainID = Polymer.CHAIN_IDS.charAt(chainCount++);
1761                   polymer.setChainID(chainID);
1762                   polymer.setSegID(chainID.toString());
1763                 }
1764                 // If the last sym op to be written.
1765                 writeEnd = iSym == nSymOps - 1 && i == l - 1 && j == m - 1 && k == n - 1;
1766                 if (!writeFile(file, true, false, writeEnd)) {
1767                   logger.info(format(" Save failed for %s", activeMolecularAssembly));
1768                   return false;
1769                 }
1770               }
1771             }
1772           }
1773         }
1774       }
1775 
1776       // Reset the chainIDs.
1777       chainCount = 0;
1778       for (Polymer polymer : polymers) {
1779         Character chainID = Polymer.CHAIN_IDS.charAt(chainCount++);
1780         polymer.setChainID(chainID);
1781         polymer.setSegID(chainID.toString());
1782       }
1783 
1784     }
1785 
1786     return true;
1787   }
1788 
1789   /**
1790    * writeFile
1791    *
1792    * @param saveFile a {@link java.io.File} object.
1793    * @param append Whether to append to saveFile (vs over-write).
1794    * @param printLinear Ignored (remains to present a different method signature).
1795    * @param writeEnd True if this is the final model.
1796    * @return Success of writing.
1797    */
1798   public boolean writeFile(File saveFile, boolean append, boolean printLinear, boolean writeEnd) {
1799     return writeFile(saveFile, append, Collections.emptySet(), writeEnd, true);
1800   }
1801 
1802   /**
1803    * writeFile
1804    *
1805    * @param saveFile a {@link java.io.File} object to save to.
1806    * @param append Whether to append to saveFile (vs over-write).
1807    * @param toExclude A {@link java.util.Set} of {@link ffx.potential.bonded.Atom}s to exclude
1808    *     from writing.
1809    * @param writeEnd True if this is the final model.
1810    * @param versioning True if the file being saved to should be versioned. False if the file
1811    *     being saved to should be overwritten.
1812    * @return Success of writing.
1813    */
1814   public boolean writeFile(File saveFile, boolean append, Set<Atom> toExclude, boolean writeEnd,
1815                            boolean versioning) {
1816     return writeFile(saveFile, append, toExclude, writeEnd, versioning, null);
1817   }
1818 
1819   /**
1820    * writeFile
1821    *
1822    * @param saveFile a {@link java.io.File} object to save to.
1823    * @param append Whether to append to saveFile (vs over-write).
1824    * @param toExclude A {@link java.util.Set} of {@link ffx.potential.bonded.Atom}s to exclude
1825    *     from writing.
1826    * @param writeEnd True if this is the final model.
1827    * @param versioning True if the file being saved to should be versioned. False if the file
1828    *     being saved to should be overwritten.
1829    * @param extraLines Extra comment/header lines to write.
1830    * @return Success of writing.
1831    */
1832   public boolean writeFile(File saveFile, boolean append, Set<Atom> toExclude, boolean writeEnd,
1833                            boolean versioning, String[] extraLines) {
1834     // Set standardize atom names to false in the presence of deuterium
1835     List<Atom> deuteriumAtoms = new ArrayList<>();
1836     for(Atom atom: activeMolecularAssembly.getAtomArray()){
1837       if(atom.getName().startsWith("D")){
1838         String name = atom.getName().replace("D","H");
1839         atom.setName(name);
1840         deuteriumAtoms.add(atom);
1841       }
1842     }
1843     if (standardizeAtomNames) {
1844       logger.info(" Setting atom names to PDB standard.");
1845       renameAtomsToPDBStandard(activeMolecularAssembly);
1846     }
1847 
1848     for(Atom atom: activeMolecularAssembly.getAtomArray()){
1849       if(deuteriumAtoms.contains(atom) && atom.getName().startsWith("H")){
1850         String name = atom.getName().replace("H","D");
1851         atom.setName(name);
1852       }
1853     }
1854     final Set<Atom> atomExclusions = toExclude == null ? Collections.emptySet() : toExclude;
1855     if (saveFile == null) {
1856       return false;
1857     }
1858     if (vdwH) {
1859       logger.info(" Saving hydrogen to van der Waals centers instead of nuclear locations.");
1860     }
1861     if (nSymOp > -1) {
1862       logger.info(format(" Saving atoms using the symmetry operator:\n%s\n",
1863               activeMolecularAssembly.getCrystal().getUnitCell().spaceGroup.getSymOp(nSymOp)
1864                       .toString()));
1865     }
1866 
1867     // Create StringBuilders for ATOM, ANISOU and TER records that can be reused.
1868     StringBuilder sb = new StringBuilder("ATOM  ");
1869     StringBuilder anisouSB = new StringBuilder("ANISOU");
1870     StringBuilder terSB = new StringBuilder("TER   ");
1871     StringBuilder model = null;
1872     for (int i = 6; i < 80; i++) {
1873       sb.append(' ');
1874       anisouSB.append(' ');
1875       terSB.append(' ');
1876     }
1877 
1878     File newFile = saveFile;
1879     if (!append) {
1880       if (versioning) {
1881         newFile = version(saveFile);
1882       }
1883     } else if (modelsWritten >= 0) {
1884       model = new StringBuilder(format("MODEL     %-4d", ++modelsWritten));
1885       model.append(repeat(" ", 65));
1886     }
1887     activeMolecularAssembly.setFile(newFile);
1888     if (activeMolecularAssembly.getName() == null) {
1889       activeMolecularAssembly.setName(newFile.getName());
1890     }
1891     if (logWrites) {
1892       logger.log(Level.INFO, " Saving {0}", newFile.getName());
1893     }
1894 
1895     try (FileWriter fw = new FileWriter(newFile, append);
1896          BufferedWriter bw = new BufferedWriter(fw)) {
1897       /*
1898        Will come before CRYST1 and ATOM records, but after anything
1899        written by writeFileWithHeader (particularly X-ray refinement statistics).
1900       */
1901       String[] headerLines = activeMolecularAssembly.getHeaderLines();
1902       for (String line : headerLines) {
1903         bw.write(format("%s\n", line));
1904       }
1905       if (extraLines != null) {
1906         if (rotamerTitration && extraLines[0].contains("REMARK")) {
1907           for (String line : extraLines) {
1908             bw.write(line + "\n");
1909           }
1910         } else {
1911           for (String line : extraLines) {
1912             bw.write(format("REMARK 999 %s\n", line));
1913           }
1914         }
1915       }
1916       if (model != null) {
1917         bw.write(model.toString());
1918         bw.newLine();
1919       }
1920       // =============================================================================
1921       // The CRYST1 record presents the unit cell parameters, space group, and Z
1922       // value. If the structure was not determined by crystallographic means, CRYST1
1923       // simply provides the unitary values, with an appropriate REMARK.
1924       //
1925       //  7 - 15       Real(9.3)     a              a (Angstroms).
1926       // 16 - 24       Real(9.3)     b              b (Angstroms).
1927       // 25 - 33       Real(9.3)     c              c (Angstroms).
1928       // 34 - 40       Real(7.2)     alpha          alpha (degrees).
1929       // 41 - 47       Real(7.2)     beta           beta (degrees).
1930       // 48 - 54       Real(7.2)     gamma          gamma (degrees).
1931       // 56 - 66       LString       sGroup         Space  group.
1932       // 67 - 70       Integer       z              Z value.
1933       // =============================================================================
1934       if (nSymOp < 0) {
1935         // Write out the unit cell.
1936         Crystal crystal = activeMolecularAssembly.getCrystal();
1937         if (crystal != null && !crystal.aperiodic()) {
1938           Crystal c = crystal.getUnitCell();
1939           if (lmn[0] > 0 || lmn[1] > 0 || lmn[2] > 0) {
1940             c.a = c.a * lmn[0];
1941             c.b = c.b * lmn[1];
1942             c.c = c.c * lmn[2];
1943           }
1944           bw.write(c.toCRYST1());
1945         }
1946       } else if (nSymOp == 0) {
1947         // Write a P1 cell.
1948         Crystal crystal = activeMolecularAssembly.getCrystal();
1949         if (crystal != null && !crystal.aperiodic()) {
1950           Crystal c = crystal.getUnitCell();
1951           Crystal p1 = new Crystal((lmn[0]>0)? c.a * lmn[0] : c.a, (lmn[1]>0)? c.b * lmn[1] : c.b, (lmn[2]>0)? c.c * lmn[2] : c.c, c.alpha, c.beta, c.gamma, "P1");
1952           bw.write(p1.toCRYST1());
1953         }
1954       }
1955       // =============================================================================
1956       // The SSBOND record identifies each disulfide bond in protein and polypeptide
1957       // structures by identifying the two residues involved in the bond.
1958       // The disulfide bond distance is included after the symmetry operations at
1959       // the end of the SSBOND record.
1960       //
1961       //  8 - 10        Integer         serNum       Serial number.
1962       // 12 - 14        LString(3)      "CYS"        Residue name.
1963       // 16             Character       chainID1     Chain identifier.
1964       // 18 - 21        Integer         seqNum1      Residue sequence number.
1965       // 22             AChar           icode1       Insertion code.
1966       // 26 - 28        LString(3)      "CYS"        Residue name.
1967       // 30             Character       chainID2     Chain identifier.
1968       // 32 - 35        Integer         seqNum2      Residue sequence number.
1969       // 36             AChar           icode2       Insertion code.
1970       // 60 - 65        SymOP           sym1         Symmetry oper for 1st resid
1971       // 67 - 72        SymOP           sym2         Symmetry oper for 2nd resid
1972       // 74 – 78        Real(5.2)      Length        Disulfide bond distance
1973       //
1974       // If SG of cysteine is disordered then there are possible alternate linkages.
1975       // wwPDB practice is to put together all possible SSBOND records. This is
1976       // problematic because the alternate location identifier is not specified in
1977       // the SSBOND record.
1978       // =============================================================================
1979       int serNum = 1;
1980       Polymer[] polymers = activeMolecularAssembly.getChains();
1981       if (polymers != null) {
1982         for (Polymer polymer : polymers) {
1983           List<Residue> residues = polymer.getResidues();
1984           for (Residue residue : residues) {
1985             if (residue.getName().equalsIgnoreCase("CYS")) {
1986               List<Atom> cysAtoms = residue.getAtomList().stream()
1987                       .filter(a -> !atomExclusions.contains(a)).toList();
1988               Atom SG1 = null;
1989               for (Atom atom : cysAtoms) {
1990                 String atName = atom.getName().toUpperCase();
1991                 if (atName.equals("SG") || atName.equals("SH")
1992                         || atom.getAtomType().atomicNumber == 16) {
1993                   SG1 = atom;
1994                   break;
1995                 }
1996               }
1997               List<Bond> bonds = SG1.getBonds();
1998               for (Bond bond : bonds) {
1999                 Atom SG2 = bond.get1_2(SG1);
2000                 if (SG2.getAtomType().atomicNumber == 16 && !atomExclusions.contains(SG2)) {
2001                   if (SG1.getIndex() < SG2.getIndex()) {
2002                     bond.energy(false);
2003                     bw.write(format("SSBOND %3d CYS %1s %4s    CYS %1s %4s %36s %5.2f\n", serNum++,
2004                             SG1.getChainID().toString(), Hybrid36.encode(4, SG1.getResidueNumber()),
2005                             SG2.getChainID().toString(), Hybrid36.encode(4, SG2.getResidueNumber()), "",
2006                             bond.getValue()));
2007                   }
2008                 }
2009               }
2010             }
2011           }
2012         }
2013       }
2014       // =============================================================================
2015       //
2016       //  7 - 11        Integer       serial       Atom serial number.
2017       // 13 - 16        Atom          name         Atom name.
2018       // 17             Character     altLoc       Alternate location indicator.
2019       // 18 - 20        Residue name  resName      Residue name.
2020       // 22             Character     chainID      Chain identifier.
2021       // 23 - 26        Integer       resSeq       Residue sequence number.
2022       // 27             AChar         iCode        Code for insertion of residues.
2023       // 31 - 38        Real(8.3)     x            Orthogonal coordinates for X in Angstroms.
2024       // 39 - 46        Real(8.3)     y            Orthogonal coordinates for Y in Angstroms.
2025       // 47 - 54        Real(8.3)     z            Orthogonal coordinates for Z in Angstroms.
2026       // 55 - 60        Real(6.2)     occupancy    Occupancy.
2027       // 61 - 66        Real(6.2)     tempFactor   Temperature factor.
2028       // 77 - 78        LString(2)    element      Element symbol, right-justified.
2029       // 79 - 80        LString(2)    charge       Charge  on the atom.
2030       // =============================================================================
2031       //         1         2         3         4         5         6         7
2032       // 123456789012345678901234567890123456789012345678901234567890123456789012345678
2033       // ATOM      1  N   ILE A  16      60.614  71.140 -10.592  1.00  7.38           N
2034       // ATOM      2  CA  ILE A  16      60.793  72.149  -9.511  1.00  6.91           C
2035       MolecularAssembly[] molecularAssemblies = this.getMolecularAssemblyArray();
2036       int serial = 1;
2037       if (nSymOp > 0) {
2038         serial = serialP1;
2039       }
2040 
2041       // Loop over biomolecular chains
2042       if (polymers != null) {
2043         for (Polymer polymer : polymers) {
2044           currentSegID = polymer.getName();
2045           currentChainID = polymer.getChainID();
2046           sb.setCharAt(21, currentChainID);
2047           // Loop over residues
2048           List<Residue> residues = polymer.getResidues();
2049           for (Residue residue : residues) {
2050             String resName = residue.getName();
2051             if (resName.length() > 3) {
2052               resName = resName.substring(0, 3);
2053             }
2054             int resID = residue.getResidueNumber();
2055             sb.replace(17, 20, padLeft(resName.toUpperCase(), 3));
2056             sb.replace(22, 26, format("%4s", Hybrid36.encode(4, resID)));
2057             // Loop over atoms
2058             List<Atom> residueAtoms = residue.getAtomList().stream()
2059                     .filter(a -> !atomExclusions.contains(a)).collect(Collectors.toList());
2060             boolean altLocFound = false;
2061             for (Atom atom : residueAtoms) {
2062               if (mutate) {
2063                 for (Mutation mtn : mutations) {
2064                   if (resID == mtn.resID) {
2065                     ArrayList<String> alchAtoms = mtn.getAlchemicalAtoms(true);
2066                     if (alchAtoms != null) {
2067                       if (residue.getBackboneAtoms().contains(atom) && alchAtoms.contains(atom.getName())) {
2068                         logger.info(format(" MUTATION atom is %d chain %s",serial, currentChainID));
2069                       }
2070                     } else {
2071                       // treating pur-pyr or pyr-pur N9/N1 & C2/C4 as alchemical
2072                       if (residue.getBackboneAtoms().contains(atom)) {
2073                         logger.info(format(" MUTATION atom is %d chain %s",serial, currentChainID));
2074                       }
2075                     }
2076                   }
2077                 }
2078               }
2079               writeAtom(atom, serial++, sb, anisouSB, bw);
2080               Character altLoc = atom.getAltLoc();
2081               if (altLoc != null && !altLoc.equals(' ')) {
2082                 altLocFound = true;
2083               }
2084             }
2085             // Write out alternate conformers
2086             if (altLocFound) {
2087               for (int ma = 1; ma < molecularAssemblies.length; ma++) {
2088                 MolecularAssembly altMolecularAssembly = molecularAssemblies[ma];
2089                 Polymer altPolymer = altMolecularAssembly.getPolymer(currentChainID, currentSegID, false);
2090                 Residue altResidue = altPolymer.getResidue(resName, resID, false, Residue.ResidueType.AA);
2091                 if (altResidue == null) {
2092                   resName = AminoAcid3.UNK.name();
2093                   altResidue = altPolymer.getResidue(resName, resID, false, Residue.ResidueType.AA);
2094                 }
2095                 residueAtoms = altResidue.getAtomList().stream()
2096                         .filter(a -> !atomExclusions.contains(a)).collect(Collectors.toList());
2097                 for (Atom atom : residueAtoms) {
2098                   if (atom.getAltLoc() != null && !atom.getAltLoc().equals(' ') && !atom.getAltLoc()
2099                           .equals('A')) {
2100                     sb.replace(17, 20, padLeft(atom.getResidueName().toUpperCase(), 3));
2101                     writeAtom(atom, serial++, sb, anisouSB, bw);
2102                   }
2103                 }
2104               }
2105             }
2106           }
2107           terSB.replace(6, 11, format("%5s", Hybrid36.encode(5, serial++)));
2108           terSB.replace(12, 16, "    ");
2109           terSB.replace(16, 26, sb.substring(16, 26));
2110           bw.write(terSB.toString());
2111           bw.newLine();
2112         }
2113       }
2114       sb.replace(0, 6, "HETATM");
2115       sb.setCharAt(21, 'A');
2116 
2117       Character chainID = 'A';
2118       if (polymers != null) {
2119         chainID = polymers[0].getChainID();
2120       }
2121       activeMolecularAssembly.setChainIDAndRenumberMolecules(chainID);
2122 
2123       // Loop over molecules, ions and then water.
2124       List<MSNode> molecules = activeMolecularAssembly.getMolecules();
2125       int numMolecules = molecules.size();
2126       for (int i = 0; i < numMolecules; i++) {
2127         Molecule molecule = (Molecule) molecules.get(i);
2128         chainID = molecule.getChainID();
2129         sb.setCharAt(21, chainID);
2130         String resName = molecule.getResidueName();
2131         int resID = molecule.getResidueNumber();
2132         if (resName.length() > 3) {
2133           resName = resName.substring(0, 3);
2134         }
2135         sb.replace(17, 20, padLeft(resName.toUpperCase(), 3));
2136         sb.replace(22, 26, format("%4s", Hybrid36.encode(4, resID)));
2137         // List<Atom> moleculeAtoms = molecule.getAtomList();
2138         List<Atom> moleculeAtoms = molecule.getAtomList().stream()
2139                 .filter(a -> !atomExclusions.contains(a)).collect(Collectors.toList());
2140         boolean altLocFound = false;
2141         for (Atom atom : moleculeAtoms) {
2142           writeAtom(atom, serial++, sb, anisouSB, bw);
2143           Character altLoc = atom.getAltLoc();
2144           if (altLoc != null && !altLoc.equals(' ')) {
2145             altLocFound = true;
2146           }
2147         }
2148         // Write out alternate conformers
2149         if (altLocFound) {
2150           for (int ma = 1; ma < molecularAssemblies.length; ma++) {
2151             MolecularAssembly altMolecularAssembly = molecularAssemblies[ma];
2152             MSNode altmolecule = altMolecularAssembly.getMolecules().get(i);
2153             moleculeAtoms = altmolecule.getAtomList();
2154             for (Atom atom : moleculeAtoms) {
2155               if (atom.getAltLoc() != null && !atom.getAltLoc().equals(' ') && !atom.getAltLoc()
2156                       .equals('A')) {
2157                 writeAtom(atom, serial++, sb, anisouSB, bw);
2158               }
2159             }
2160           }
2161         }
2162       }
2163 
2164       List<MSNode> ions = activeMolecularAssembly.getIons();
2165       for (int i = 0; i < ions.size(); i++) {
2166         Molecule ion = (Molecule) ions.get(i);
2167         chainID = ion.getChainID();
2168         sb.setCharAt(21, chainID);
2169         String resName = ion.getResidueName();
2170         int resID = ion.getResidueNumber();
2171         if (resName.length() > 3) {
2172           resName = resName.substring(0, 3);
2173         }
2174         sb.replace(17, 20, padLeft(resName.toUpperCase(), 3));
2175         sb.replace(22, 26, format("%4s", Hybrid36.encode(4, resID)));
2176         // List<Atom> ionAtoms = ion.getAtomList();
2177         List<Atom> ionAtoms = ion.getAtomList().stream().filter(a -> !atomExclusions.contains(a))
2178                 .collect(Collectors.toList());
2179         boolean altLocFound = false;
2180         for (Atom atom : ionAtoms) {
2181           writeAtom(atom, serial++, sb, anisouSB, bw);
2182           Character altLoc = atom.getAltLoc();
2183           if (altLoc != null && !altLoc.equals(' ')) {
2184             altLocFound = true;
2185           }
2186         }
2187         // Write out alternate conformers
2188         if (altLocFound) {
2189           for (int ma = 1; ma < molecularAssemblies.length; ma++) {
2190             MolecularAssembly altMolecularAssembly = molecularAssemblies[ma];
2191             MSNode altion = altMolecularAssembly.getIons().get(i);
2192             ionAtoms = altion.getAtomList();
2193             for (Atom atom : ionAtoms) {
2194               if (atom.getAltLoc() != null && !atom.getAltLoc().equals(' ') && !atom.getAltLoc()
2195                       .equals('A')) {
2196                 writeAtom(atom, serial++, sb, anisouSB, bw);
2197               }
2198             }
2199           }
2200         }
2201       }
2202 
2203       List<MSNode> water = activeMolecularAssembly.getWater();
2204       for (int i = 0; i < water.size(); i++) {
2205         Molecule wat = (Molecule) water.get(i);
2206         chainID = wat.getChainID();
2207         sb.setCharAt(21, chainID);
2208         String resName = wat.getResidueName();
2209         int resID = wat.getResidueNumber();
2210         if (resName.length() > 3) {
2211           resName = resName.substring(0, 3);
2212         }
2213         sb.replace(17, 20, padLeft(resName.toUpperCase(), 3));
2214         sb.replace(22, 26, format("%4s", Hybrid36.encode(4, resID)));
2215         List<Atom> waterAtoms = wat.getAtomList().stream().filter(a -> !atomExclusions.contains(a))
2216                 .collect(Collectors.toList());
2217         boolean altLocFound = false;
2218         for (Atom atom : waterAtoms) {
2219           writeAtom(atom, serial++, sb, anisouSB, bw);
2220           Character altLoc = atom.getAltLoc();
2221           if (altLoc != null && !altLoc.equals(' ')) {
2222             altLocFound = true;
2223           }
2224         }
2225         // Write out alternate conformers
2226         if (altLocFound) {
2227           for (int ma = 1; ma < molecularAssemblies.length; ma++) {
2228             MolecularAssembly altMolecularAssembly = molecularAssemblies[ma];
2229             MSNode altwater = altMolecularAssembly.getWater().get(i);
2230             waterAtoms = altwater.getAtomList();
2231             for (Atom atom : waterAtoms) {
2232               if (atom.getAltLoc() != null && !atom.getAltLoc().equals(' ') && !atom.getAltLoc()
2233                       .equals('A')) {
2234                 writeAtom(atom, serial++, sb, anisouSB, bw);
2235               }
2236             }
2237           }
2238         }
2239       }
2240 
2241       if (model != null) {
2242         bw.write("ENDMDL");
2243         bw.newLine();
2244       }
2245 
2246       if (writeEnd) {
2247         bw.write("END");
2248         bw.newLine();
2249       }
2250 
2251       if (nSymOp >= 0) {
2252         serialP1 = serial;
2253       }
2254 
2255     } catch (Exception e) {
2256       String message = "Exception writing to file: " + saveFile;
2257       logger.log(Level.WARNING, message, e);
2258       return false;
2259     }
2260     return true;
2261   }
2262 
2263   /**
2264    * {@inheritDoc}
2265    *
2266    * <p>Write out the Atomic information in PDB format.
2267    */
2268   @Override
2269   public boolean writeFile(File saveFile, boolean append) {
2270     return writeFile(saveFile, append, false, true);
2271   }
2272 
2273   public boolean writeFile(File saveFile, boolean append, String[] extraLines) {
2274     return writeFile(saveFile, append, Collections.emptySet(), false, !append, extraLines);
2275   }
2276 
2277   /**
2278    * Writes out the atomic information in PDB format.
2279    *
2280    * @param saveFile The file to save information to.
2281    * @param append True if the current data should be appended to the saveFile (as in arc
2282    *     files).
2283    * @param versioning True if the saveFile should be versioned. False if the saveFile should be
2284    *     overwritten.
2285    * @return Success of writing.
2286    */
2287   public boolean writeFile(File saveFile, boolean append, boolean versioning) {
2288     return writeFile(saveFile, append, Collections.emptySet(), true, versioning);
2289   }
2290 
2291   /**
2292    * writeFileWithHeader.
2293    *
2294    * @param saveFile a {@link java.io.File} object.
2295    * @param header a {@link java.lang.String} object.
2296    * @param append a boolean.
2297    * @return a boolean.
2298    */
2299   public boolean writeFileWithHeader(File saveFile, String header, boolean append) {
2300     // Set standardize atom names to false in the presence of deuterium
2301     List<Atom> deuteriumAtoms = new ArrayList<>();
2302     for(Atom atom: activeMolecularAssembly.getAtomArray()){
2303       if(atom.getName().startsWith("D")){
2304         String name = atom.getName().replace("D","H");
2305         atom.setName(name);
2306         deuteriumAtoms.add(atom);
2307       }
2308     }
2309     if (standardizeAtomNames) {
2310       logger.info(" Setting atom names to PDB standard.");
2311       renameAtomsToPDBStandard(activeMolecularAssembly);
2312     }
2313 
2314     for(Atom atom: activeMolecularAssembly.getAtomArray()){
2315       if(deuteriumAtoms.contains(atom) && atom.getName().startsWith("H")){
2316         String name = atom.getName().replace("H","D");
2317         atom.setName(name);
2318       }
2319     }
2320     activeMolecularAssembly.setFile(saveFile);
2321     activeMolecularAssembly.setName(saveFile.getName());
2322 
2323     try (FileWriter fw = new FileWriter(saveFile, append); BufferedWriter bw = new BufferedWriter(
2324             fw)) {
2325       bw.write(header);
2326       bw.newLine();
2327     } catch (Exception e) {
2328       String message = " Exception writing to file: " + saveFile;
2329       logger.log(Level.WARNING, message, e);
2330       return false;
2331     }
2332     if (writeFile(saveFile, true)) {
2333       logger.log(Level.INFO, " Wrote PDB to file {0}", saveFile.getPath());
2334       return true;
2335     } else {
2336       logger.log(Level.INFO, " Error writing to file {0}", saveFile.getPath());
2337       return false;
2338     }
2339   }
2340 
2341   /**
2342    * writeFileWithHeader.
2343    *
2344    * @param saveFile a {@link java.io.File} object.
2345    * @param header a {@link java.lang.String} object.
2346    * @return a boolean.
2347    */
2348   public boolean writeFileWithHeader(File saveFile, String header) {
2349     return writeFileWithHeader(saveFile, header, true);
2350   }
2351 
2352   /**
2353    * writeFileWithHeader.
2354    *
2355    * @param saveFile a {@link java.io.File} object.
2356    * @param header a {@link java.lang.StringBuilder} object.
2357    * @return a boolean.
2358    */
2359   public boolean writeFileWithHeader(File saveFile, StringBuilder header) {
2360     return writeFileWithHeader(saveFile, header.toString());
2361   }
2362 
2363   /**
2364    * Get unique SegID for possibly duplicate chain IDs.
2365    *
2366    * @param c chain ID just read.
2367    * @return a unique segID.
2368    */
2369   private String getExistingSegID(Character c) {
2370     if (c.equals(' ')) {
2371       c = 'A';
2372     }
2373 
2374     // If the chain ID has not changed, return the existing segID.
2375     if (c.equals(currentChainID)) {
2376       return currentSegID;
2377     } else {
2378       currentChainID = null;
2379     }
2380 
2381     List<String> segIDs = segidMap.get(c);
2382     if (segIDs != null) {
2383       if (segIDs.size() > 1) {
2384         if (currentSegID == null) {
2385           currentChainID = c;
2386           currentSegID = segIDs.get(0);
2387           return segIDs.get(0);
2388         } else if (currentSegID.length() == 1) {
2389           currentChainID = c;
2390           currentSegID = segIDs.get(1);
2391           return segIDs.get(1);
2392         } else if (currentSegID.length() == 2) {
2393           String s = currentSegID.substring(0,1);
2394           int num = -2;
2395           try {
2396             num = Integer.parseInt(s);
2397           } catch (NumberFormatException e) {
2398             logger.severe(" SegID of length 2 does not start with an integer.");
2399           }
2400           currentChainID = c;
2401           currentSegID = segIDs.get(num+1);
2402           return segIDs.get(num+1);
2403         } else {
2404           logger.info(" Too many repeated chains. Using single letter for segID.");
2405         }
2406       }
2407       return segIDs.get(0);
2408     } else {
2409       logger.log(Level.INFO, format(" Creating SegID for to chain %s", c));
2410       return getSegID(c);
2411     }
2412   }
2413 
2414   /**
2415    * Convert possibly duplicate chain IDs into unique segIDs.
2416    *
2417    * @param c chain ID just read.
2418    * @return a unique segID.
2419    */
2420   private String getSegID(Character c) {
2421     if (c.equals(' ')) {
2422       c = 'A';
2423     }
2424 
2425     // If the chain ID has not changed, return the existing segID.
2426     if (c.equals(currentChainID)) {
2427       return currentSegID;
2428     }
2429 
2430     // Loop through existing segIDs to find the first one that is unused.
2431     int count = 0;
2432     for (String segID : segIDs) {
2433       if (segID.endsWith(c.toString())) {
2434         count++;
2435       }
2436     }
2437 
2438     // If the count is greater than 0, then append it.
2439     String newSegID;
2440     if (count == 0) {
2441       newSegID = c.toString();
2442     } else {
2443       newSegID = count + c.toString();
2444     }
2445     segIDs.add(newSegID);
2446     currentChainID = c;
2447     currentSegID = newSegID;
2448 
2449     if (segidMap.containsKey(c)) {
2450       segidMap.get(c).add(newSegID);
2451     } else {
2452       List<String> newChainList = new ArrayList<>();
2453       newChainList.add(newSegID);
2454       segidMap.put(c, newChainList);
2455     }
2456 
2457     return newSegID;
2458   }
2459 
2460   /**
2461    * writeAtom
2462    *
2463    * @param atom a {@link ffx.potential.bonded.Atom} object.
2464    * @param serial a int.
2465    * @param sb a {@link java.lang.StringBuilder} object.
2466    * @param anisouSB a {@link java.lang.StringBuilder} object.
2467    * @param bw a {@link java.io.BufferedWriter} object.
2468    * @throws java.io.IOException if any.
2469    */
2470   private void writeAtom(Atom atom, int serial, StringBuilder sb, StringBuilder anisouSB,
2471                          BufferedWriter bw) throws IOException {
2472     String name = atom.getName();
2473     int nameLength = name.length();
2474     if (nameLength > 4) {
2475       name = name.substring(0, 4);
2476     } else if (nameLength == 1) {
2477       name = name + "  ";
2478     } else if (nameLength == 2) {
2479       if (atom.getAtomType().valence == 0) {
2480         name = name + "  ";
2481       } else {
2482         name = name + " ";
2483       }
2484     }
2485     double[] xyz = vdwH ? atom.getRedXYZ() : atom.getXYZ(null);
2486     if (nSymOp >= 0) {
2487       Crystal crystal = activeMolecularAssembly.getCrystal().getUnitCell();
2488       SymOp symOp = crystal.spaceGroup.getSymOp(nSymOp);
2489       double[] newXYZ = new double[xyz.length];
2490       crystal.applySymOp(xyz, newXYZ, symOp);
2491       if (lValue > 0 || mValue > 0 || nValue > 0) {
2492         double[] translation = new double[] {lValue, mValue, nValue};
2493         crystal.getUnitCell().toCartesianCoordinates(translation, translation);
2494         newXYZ[0] += translation[0];
2495         newXYZ[1] += translation[1];
2496         newXYZ[2] += translation[2];
2497       }
2498       xyz = newXYZ;
2499     }
2500     sb.replace(6, 16, format("%5s " + padLeft(name.toUpperCase(), 4), Hybrid36.encode(5, serial)));
2501     Character altLoc = atom.getAltLoc();
2502     sb.setCharAt(16, Objects.requireNonNullElse(altLoc, ' '));
2503 
2504 
2505     /*
2506      * On the following code:
2507      * #1: StringBuilder.replace will allow for longer strings, expanding the StringBuilder's length if necessary.
2508      *
2509      * #2: sb was never re-initialized, so if there was overflow,
2510      * sb would continue to be > 80 characters long, resulting in broken PDB files
2511      *
2512      * #3: It may be wiser to have XYZ coordinates result in shutdown, not
2513      * truncation of coordinates. #4: Excessive B-factors aren't much of an
2514      * issue; if the B-factor is past 999.99, that's the difference between
2515      * "density extends to Venus" and "density extends to Pluto".
2516      */
2517     StringBuilder decimals = new StringBuilder();
2518     for (int i = 0; i < 3; i++) {
2519       try {
2520         decimals.append(StringUtils.fwFpDec(xyz[i], 8, 3));
2521       } catch (IllegalArgumentException ex) {
2522         String newValue = StringUtils.fwFpTrunc(xyz[i], 8, 3);
2523         logger.info(format(" XYZ %d coordinate %8.3f for atom %s "
2524                 + "overflowed bounds of 8.3f string specified by PDB "
2525                 + "format; truncating value to %s", i, xyz[i], atom, newValue));
2526         decimals.append(newValue);
2527       }
2528     }
2529     try {
2530       decimals.append(StringUtils.fwFpDec(atom.getOccupancy(), 6, 2));
2531     } catch (IllegalArgumentException ex) {
2532       logger.severe(
2533               format(" Occupancy %f for atom %s is impossible; " + "value must be between 0 and 1",
2534                       atom.getOccupancy(), atom));
2535     }
2536     try {
2537       decimals.append(StringUtils.fwFpDec(atom.getTempFactor(), 6, 2));
2538     } catch (IllegalArgumentException ex) {
2539       String newValue = StringUtils.fwFpTrunc(atom.getTempFactor(), 6, 2);
2540       logger.info(format(" Atom temp factor %6.2f for atom %s overflowed "
2541                       + "bounds of 6.2f string specified by PDB format; truncating " + "value to %s",
2542               atom.getTempFactor(), atom, newValue));
2543       decimals.append(newValue);
2544     }
2545     sb.replace(30, 66, decimals.toString());
2546 
2547     name = Atom.ElementSymbol.values()[atom.getAtomicNumber() - 1].toString();
2548     name = name.toUpperCase();
2549     if (atom.isDeuterium()) {
2550       name = "D";
2551     }
2552     sb.replace(76, 78, padLeft(name, 2));
2553     sb.replace(78, 80, format("%2d", 0));
2554     bw.write(sb.toString());
2555     bw.newLine();
2556     // =============================================================================
2557     //  1 - 6        Record name   "ANISOU"
2558     //  7 - 11       Integer       serial         Atom serial number.
2559     // 13 - 16       Atom          name           Atom name.
2560     // 17            Character     altLoc         Alternate location indicator
2561     // 18 - 20       Residue name  resName        Residue name.
2562     // 22            Character     chainID        Chain identifier.
2563     // 23 - 26       Integer       resSeq         Residue sequence number.
2564     // 27            AChar         iCode          Insertion code.
2565     // 29 - 35       Integer       u[0][0]        U(1,1)
2566     // 36 - 42       Integer       u[1][1]        U(2,2)
2567     // 43 - 49       Integer       u[2][2]        U(3,3)
2568     // 50 - 56       Integer       u[0][1]        U(1,2)
2569     // 57 - 63       Integer       u[0][2]        U(1,3)
2570     // 64 - 70       Integer       u[1][2]        U(2,3)
2571     // 77 - 78       LString(2)    element        Element symbol, right-justified.
2572     // 79 - 80       LString(2)    charge         Charge on the atom.
2573     // =============================================================================
2574     double[] anisou = atom.getAnisou(null);
2575     if (anisou != null) {
2576       anisouSB.replace(6, 80, sb.substring(6, 80));
2577       anisouSB.replace(28, 70,
2578               format("%7d%7d%7d%7d%7d%7d", (int) (anisou[0] * 1e4), (int) (anisou[1] * 1e4),
2579                       (int) (anisou[2] * 1e4), (int) (anisou[3] * 1e4), (int) (anisou[4] * 1e4),
2580                       (int) (anisou[5] * 1e4)));
2581       bw.write(anisouSB.toString());
2582       bw.newLine();
2583     }
2584   }
2585 
2586   /** PDB records that are recognized. */
2587   private enum Record {
2588     ANISOU, ATOM, CONECT, CRYST1, DBREF, END, MODEL, ENDMDL, HELIX, HETATM, LINK, MTRIX1, MTRIX2, MTRIX3, MODRES, SEQRES, SHEET, SSBOND, REMARK
2589   }
2590 
2591   /** Presently, VERSION3_3 is default, and VERSION3_2 is anything non-standard. */
2592   public enum PDBFileStandard {
2593     VERSION2_3, VERSION3_0, VERSION3_1, VERSION3_2, VERSION3_3
2594   }
2595 
2596   public static class Mutation {
2597 
2598     /** Residue ID of the residue to mutate. */
2599     final int resID;
2600     /** Residue name after mutation. */
2601     final String resName;
2602     /** Character for the chain ID of the residue that will be mutated. */
2603     final char chainChar;
2604     /** Residue name before mutation. */
2605     String origResName;
2606 
2607     public Mutation(int resID, char chainChar, String newResName) {
2608       newResName = newResName.toUpperCase();
2609       if (newResName.length() != 3) {
2610         logger.log(Level.WARNING, format("Invalid mutation target: %s.", newResName));
2611       }
2612       int isAA = AminoAcidUtils.getAminoAcidNumber(newResName);
2613       int isNA = NucleicAcidUtils.getNucleicAcidNumber(newResName);
2614       if (isAA == -1 && isNA == -1) {
2615         logger.log(Level.WARNING, format("Invalid mutation target: %s.", newResName));
2616       }
2617       this.resID = resID;
2618       this.chainChar = chainChar;
2619       this.resName = newResName;
2620     }
2621 
2622     /**
2623      * Check to see if an atom is involved in the mutated base's glycosyl torsion. If the mutation is a switch from
2624      * purine to pyrimidine or vice versa, it will return '~name', meaning the name should be replaced but to include it
2625      * as an alchemical atom.
2626      * @param atomName atom name to check
2627      * @return new name to use if it is involved in glycosyl torsion, null otherwise
2628      */
2629     public String isNonAlchemicalAtom(String atomName) {
2630       if (isWtPurine()) {
2631         if (atomName.equals("N9")) {
2632           if (isMtnPyrimidine()) {
2633             return "~N1";
2634           }
2635           return atomName;
2636         } else if (atomName.equals("C4")) {
2637           if (isMtnPyrimidine()) {
2638             return "~C2";
2639           }
2640           return atomName;
2641         }
2642         return null;
2643       }
2644 
2645       if (isWtPyrimidine()) {
2646         if (atomName.equals("N1")) {
2647           if (isMtnPurine()) {
2648             // here
2649             return "~N9";
2650           }
2651           return atomName;
2652         } else if (atomName.equals("C2")) {
2653           if (isMtnPurine()) {
2654             // here
2655             return "~C4";
2656           }
2657           return atomName;
2658         }
2659         return null;
2660       }
2661 
2662       return null;
2663     }
2664 
2665     /**
2666      * Determines what atoms should be alchemical for a purine to purine or pyrimidine to pyrimidine mutation.
2667      * @param isWriting true if writing the pdb, false if reading the pdb
2668      * @return ArrayList of alchemical atoms, null if not a pur-pur or pyr-pyr mutation
2669      */
2670     public ArrayList<String> getAlchemicalAtoms(boolean isWriting) {
2671       // Log warning that the mutation input is the same residue and return nothing so prev. functionality is not changed
2672       if (resName.equals(origResName)) {
2673         logger.severe("Desired Mutation residue is the same as the original.");
2674         return null;
2675       }
2676 
2677       boolean purpur;
2678       if (isMtnPurine() && isWtPurine()) {
2679         purpur = true;
2680       } else if (isMtnPyrimidine() && isWtPyrimidine()) {
2681         purpur = false;
2682       } else {
2683         // Return nothing so previous functionality is not changed
2684         return null;
2685       }
2686 
2687       String res;
2688 
2689       // look at the mutation residue if writing or the original (wild type) residue if reading
2690       if (isWriting) {
2691         res = resName;
2692       } else {
2693         res = origResName;
2694       }
2695 
2696       ArrayList<String> list = new ArrayList<>();
2697       if (purpur) { // purine: either A to G or G to A
2698         if (res.equals("DAD") || res.equals("DA")) { // MTN is A
2699           list.add("N6");
2700           list.add("H61");
2701           list.add("H62");
2702           list.add("H2");
2703         } else { // MTN is G
2704           list.add("H1");
2705           list.add("N2");
2706           list.add("H21");
2707           list.add("H22");
2708           list.add("O6");
2709         }
2710       } else { // pyrimidine: either T to C or C to T
2711         if (res.equals("DTY") || res.equals("DT")) { // MTN is T
2712           list.add("H3");
2713           list.add("O4");
2714           list.add("C7");
2715           list.add("H71");
2716           list.add("H72");
2717           list.add("H73");
2718         } else { // MTN is C
2719           list.add("N4");
2720           list.add("H41");
2721           list.add("H42");
2722           list.add("H5");
2723         }
2724       }
2725       return list;
2726     }
2727 
2728     /**
2729      * Determine if the mutation residue is purine.
2730      * @return true if mutation is purine
2731      */
2732     public boolean isMtnPurine() {
2733       return resName.equals("DA") || resName.equals("DG") || resName.equals("DAD") || resName.equals("DGU");
2734     }
2735 
2736     /**
2737      * Determine if the mutation residue is pyrimidine.
2738      * @return true if mutation is pyrimidine
2739      */
2740     public boolean isMtnPyrimidine() {
2741       return resName.equals("DC") || resName.equals("DT") || resName.equals("DCY") || resName.equals("DTY");
2742     }
2743 
2744     /**
2745      * Determine if original (wild type) residue is purine.
2746      * @return true if original residue is purine
2747      */
2748     public boolean isWtPurine() {
2749       return origResName.equals("DA") || origResName.equals("DG") || origResName.equals("DAD") || origResName.equals("DGU");
2750     }
2751 
2752     /**
2753      * Determine if original (wild type) residue is pyrimidine.
2754      * @return true if original residue is pyrimidine
2755      */
2756     public boolean isWtPyrimidine() {
2757       return origResName.equals("DT") || origResName.equals("DC") || origResName.equals("DTY") || origResName.equals("DCY");
2758     }
2759   }
2760 }