1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38 package ffx.potential.parsers;
39
40 import static ffx.potential.bonded.BondedUtils.numberAtoms;
41 import static ffx.potential.bonded.NamingUtils.renameAtomsToPDBStandard;
42 import static ffx.potential.bonded.PolymerUtils.assignAtomTypes;
43 import static ffx.potential.bonded.PolymerUtils.buildDisulfideBonds;
44 import static ffx.potential.bonded.PolymerUtils.buildMissingResidues;
45 import static ffx.potential.bonded.PolymerUtils.locateDisulfideBonds;
46 import static ffx.potential.parsers.PDBFilter.PDBFileStandard.VERSION3_2;
47 import static ffx.potential.parsers.PDBFilter.PDBFileStandard.VERSION3_3;
48 import static ffx.utilities.StringUtils.padLeft;
49 import static java.lang.Double.parseDouble;
50 import static java.lang.Integer.parseInt;
51 import static java.lang.String.format;
52 import static org.apache.commons.lang3.StringUtils.repeat;
53 import static org.apache.commons.math3.util.FastMath.min;
54
55 import ffx.crystal.Crystal;
56 import ffx.crystal.SpaceGroup;
57 import ffx.crystal.SpaceGroupDefinitions;
58 import ffx.crystal.SpaceGroupInfo;
59 import ffx.crystal.SymOp;
60 import ffx.potential.MolecularAssembly;
61 import ffx.potential.Utilities.FileType;
62 import ffx.potential.bonded.AminoAcidUtils;
63 import ffx.potential.bonded.AminoAcidUtils.AminoAcid3;
64 import ffx.potential.bonded.Atom;
65 import ffx.potential.bonded.Bond;
66 import ffx.potential.bonded.MSNode;
67 import ffx.potential.bonded.Molecule;
68 import ffx.potential.bonded.Polymer;
69 import ffx.potential.bonded.Residue;
70 import ffx.potential.parameters.ForceField;
71 import ffx.utilities.Hybrid36;
72 import ffx.utilities.StringUtils;
73 import java.io.BufferedReader;
74 import java.io.BufferedWriter;
75 import java.io.File;
76 import java.io.FileNotFoundException;
77 import java.io.FileReader;
78 import java.io.FileWriter;
79 import java.io.IOException;
80 import java.util.ArrayList;
81 import java.util.Collections;
82 import java.util.HashMap;
83 import java.util.List;
84 import java.util.Map;
85 import java.util.Objects;
86 import java.util.OptionalDouble;
87 import java.util.Set;
88 import java.util.logging.Level;
89 import java.util.logging.Logger;
90 import java.util.regex.Matcher;
91 import java.util.regex.Pattern;
92 import java.util.stream.Collectors;
93 import org.apache.commons.configuration2.CompositeConfiguration;
94
95
96
97
98
99
100
101
102
103
104 public final class PDBFilter extends SystemFilter {
105
106 private static final Logger logger = Logger.getLogger(PDBFilter.class.getName());
107 private static final Set<String> backboneNames;
108 private static final Set<String> constantPhBackboneNames;
109
110 static {
111 String[] names = {"C", "CA", "N", "O", "OXT", "OT2"};
112 backboneNames = Set.of(names);
113
114 String[] constantPhNames = {"C", "CA", "N", "O", "OXT", "OT2", "H", "HA", "H1", "H2", "H3"};
115 constantPhBackboneNames = Set.of(constantPhNames);
116 }
117
118
119 private final Map<Character, String[]> seqRes = new HashMap<>();
120
121 private final Map<Character, int[]> dbRef = new HashMap<>();
122
123 private final List<Character> altLocs = new ArrayList<>();
124
125
126
127
128
129
130
131
132
133
134 private final List<String> segIDs = new ArrayList<>();
135
136 private final Map<Character, List<String>> segidMap = new HashMap<>();
137
138 private final Map<Character, Integer> insertionCodeCount = new HashMap<>();
139
140
141
142
143 private final Map<String, String> pdbToNewResMap = new HashMap<>();
144
145 private final Map<String, String> modRes = new HashMap<>();
146
147 private final HashMap<Integer, Atom> atoms = new HashMap<>();
148
149 private final Map<MolecularAssembly, BufferedReader> readers = new HashMap<>();
150
151 private Character currentAltLoc = 'A';
152
153 private Character currentChainID = null;
154
155 private String currentSegID = null;
156
157 private boolean mutate = false;
158 private List<Mutation> mutations = null;
159 private List<Integer> resNumberList = null;
160
161 private boolean printMissingFields = true;
162
163 private int nSymOp = -1;
164
165
166
167
168 private int serialP1 = 0;
169
170 private PDBFileStandard fileStandard = VERSION3_3;
171
172 private boolean logWrites = true;
173
174 private int modelsRead = 1;
175
176 private int modelsWritten = -1;
177
178 private int[] lmn = new int[]{1,1,1};
179
180 private int l = 0;
181
182 private int m = 0;
183
184 private int n = 0;
185 private String versionFileName;
186
187 private final File readFile;
188 private List<String> remarkLines = Collections.emptyList();
189 private double lastReadLambda = Double.NaN;
190
191
192
193
194 private boolean constantPH = false;
195
196
197
198 private boolean rotamerTitration = false;
199
200
201
202 private static final HashMap<AminoAcid3, AminoAcid3> constantPHResidueMap = new HashMap<>();
203
204 static {
205
206 constantPHResidueMap.put(AminoAcidUtils.AminoAcid3.LYD, AminoAcidUtils.AminoAcid3.LYS);
207
208 constantPHResidueMap.put(AminoAcidUtils.AminoAcid3.CYD, AminoAcidUtils.AminoAcid3.CYS);
209
210 constantPHResidueMap.put(AminoAcidUtils.AminoAcid3.HID, AminoAcidUtils.AminoAcid3.HIS);
211 constantPHResidueMap.put(AminoAcidUtils.AminoAcid3.HIE, AminoAcidUtils.AminoAcid3.HIS);
212
213 constantPHResidueMap.put(AminoAcidUtils.AminoAcid3.ASP, AminoAcidUtils.AminoAcid3.ASD);
214 constantPHResidueMap.put(AminoAcidUtils.AminoAcid3.ASH, AminoAcidUtils.AminoAcid3.ASD);
215
216 constantPHResidueMap.put(AminoAcidUtils.AminoAcid3.GLU, AminoAcidUtils.AminoAcid3.GLD);
217 constantPHResidueMap.put(AminoAcidUtils.AminoAcid3.GLH, AminoAcidUtils.AminoAcid3.GLD);
218 }
219
220
221
222
223 private static final HashMap<AminoAcid3, AminoAcid3> rotamerResidueMap = new HashMap<>();
224
225 static {
226
227 rotamerResidueMap.put(AminoAcidUtils.AminoAcid3.LYD, AminoAcidUtils.AminoAcid3.LYS);
228
229 rotamerResidueMap.put(AminoAcidUtils.AminoAcid3.HID, AminoAcidUtils.AminoAcid3.HIS);
230 rotamerResidueMap.put(AminoAcidUtils.AminoAcid3.HIE, AminoAcidUtils.AminoAcid3.HIS);
231
232 rotamerResidueMap.put(AminoAcidUtils.AminoAcid3.ASP, AminoAcidUtils.AminoAcid3.ASH);
233
234 rotamerResidueMap.put(AminoAcidUtils.AminoAcid3.GLU, AminoAcidUtils.AminoAcid3.GLH);
235
236 rotamerResidueMap.put(AminoAcidUtils.AminoAcid3.CYD, AminoAcidUtils.AminoAcid3.CYS);
237 }
238
239
240
241
242
243
244
245
246
247
248 public PDBFilter(List<File> files, MolecularAssembly molecularAssembly, ForceField forceField,
249 CompositeConfiguration properties) {
250 super(files, molecularAssembly, forceField, properties);
251 bondList = new ArrayList<>();
252 this.fileType = FileType.PDB;
253 readFile = files.get(0);
254 }
255
256
257
258
259
260
261
262
263
264
265 public PDBFilter(File file, MolecularAssembly molecularAssembly, ForceField forceField,
266 CompositeConfiguration properties) {
267 super(file, molecularAssembly, forceField, properties);
268 bondList = new ArrayList<>();
269 this.fileType = FileType.PDB;
270 readFile = file;
271 }
272
273
274
275
276
277
278
279
280
281
282 public PDBFilter(File file, List<MolecularAssembly> molecularAssemblies, ForceField forceField,
283 CompositeConfiguration properties) {
284 super(file, molecularAssemblies, forceField, properties);
285 bondList = new ArrayList<>();
286 this.fileType = FileType.PDB;
287 readFile = file;
288 }
289
290
291
292
293
294
295
296
297
298
299
300
301 public PDBFilter(File file, MolecularAssembly molecularAssembly, ForceField forceField,
302 CompositeConfiguration properties, List<Integer> resNumberList) {
303 super(file, molecularAssembly, forceField, properties);
304 bondList = new ArrayList<>();
305 this.fileType = FileType.PDB;
306 this.readFile = file;
307 this.resNumberList = resNumberList;
308
309 }
310
311
312
313
314
315
316
317
318 public static String toPDBAtomLine(Atom atom) {
319 StringBuilder sb;
320 if (atom.isHetero()) {
321 sb = new StringBuilder("HETATM");
322 } else {
323 sb = new StringBuilder("ATOM ");
324 }
325 sb.append(repeat(" ", 74));
326
327 String name = atom.getName();
328 if (name.length() > 4) {
329 name = name.substring(0, 4);
330 } else if (name.length() == 1) {
331 name = name + " ";
332 } else if (name.length() == 2) {
333 name = name + " ";
334 }
335 int serial = atom.getXyzIndex();
336 sb.replace(6, 16, format("%5s " + padLeft(name.toUpperCase(), 4), Hybrid36.encode(5, serial)));
337
338 Character altLoc = atom.getAltLoc();
339 if (altLoc != null) {
340 sb.setCharAt(16, altLoc);
341 } else {
342 char blankChar = ' ';
343 sb.setCharAt(16, blankChar);
344 }
345
346 String resName = atom.getResidueName();
347 sb.replace(17, 20, padLeft(resName.toUpperCase(), 3));
348
349 char chain = atom.getChainID();
350 sb.setCharAt(21, chain);
351
352 int resID = atom.getResidueNumber();
353 sb.replace(22, 26, format("%4s", Hybrid36.encode(4, resID)));
354
355 double[] xyz = atom.getXYZ(null);
356 StringBuilder decimals = new StringBuilder();
357 for (int i = 0; i < 3; i++) {
358 try {
359 decimals.append(StringUtils.fwFpDec(xyz[i], 8, 3));
360 } catch (IllegalArgumentException ex) {
361 String newValue = StringUtils.fwFpTrunc(xyz[i], 8, 3);
362 logger.info(
363 format(" XYZ coordinate %8.3f for atom %s overflowed PDB format and is truncated to %s.",
364 xyz[i], atom, newValue));
365 decimals.append(newValue);
366 }
367 }
368 try {
369 decimals.append(StringUtils.fwFpDec(atom.getOccupancy(), 6, 2));
370 } catch (IllegalArgumentException ex) {
371 logger.severe(
372 format(" Occupancy %6.2f for atom %s must be between 0 and 1.", atom.getOccupancy(),
373 atom));
374 }
375 try {
376 decimals.append(StringUtils.fwFpDec(atom.getTempFactor(), 6, 2));
377 } catch (IllegalArgumentException ex) {
378 String newValue = StringUtils.fwFpTrunc(atom.getTempFactor(), 6, 2);
379 logger.info(
380 format(" B-factor %6.2f for atom %s overflowed the PDB format and is truncated to %s.",
381 atom.getTempFactor(), atom, newValue));
382 decimals.append(newValue);
383 }
384
385 sb.replace(30, 66, decimals.toString());
386 sb.replace(78, 80, format("%2d", 0));
387 sb.append("\n");
388 return sb.toString();
389 }
390
391 public void setConstantPH(boolean constantPH) {
392 this.constantPH = constantPH;
393 }
394
395 public void setRotamerTitration(boolean rotamerTitration) {
396 this.rotamerTitration = rotamerTitration;
397 }
398
399
400 public void clearSegIDs() {
401 segIDs.clear();
402 }
403
404
405 @Override
406 public void closeReader() {
407 for (MolecularAssembly system : systems) {
408 BufferedReader br = readers.get(system);
409 if (br != null) {
410 try {
411 br.close();
412 } catch (IOException ex) {
413 logger.warning(format(" Exception in closing system %s: %s", system.toString(), ex));
414 }
415 }
416 }
417 }
418
419 @Override
420 public int countNumModels() {
421 Set<File> files = systems.stream().map(MolecularAssembly::getFile).map(File::toString).distinct()
422 .map(File::new).collect(Collectors.toSet());
423
424
425 return files.parallelStream().mapToInt((File fi) -> {
426 int nModelsLocal = 0;
427 try (BufferedReader br = new BufferedReader(new FileReader(fi))) {
428 String line = br.readLine();
429 while (line != null) {
430 if (line.startsWith("MODEL")) {
431 ++nModelsLocal;
432 }
433 line = br.readLine();
434 }
435 nModelsLocal = Math.max(1, nModelsLocal);
436 } catch (IOException ex) {
437 logger.info(format(" Exception in parsing file %s: %s", fi, ex));
438 }
439 return nModelsLocal;
440 }).sum();
441 }
442
443
444
445
446
447
448 public List<Character> getAltLocs() {
449 return altLocs;
450 }
451
452
453 @Override
454 public OptionalDouble getLastReadLambda() {
455 return Double.isNaN(lastReadLambda) ? OptionalDouble.empty() : OptionalDouble.of(lastReadLambda);
456 }
457
458
459
460
461
462
463 @Override
464 public String[] getRemarkLines() {
465 int nRemarks = remarkLines.size();
466 return remarkLines.toArray(new String[nRemarks]);
467 }
468
469 @Override
470 public int getSnapshot() {
471 return modelsRead;
472 }
473
474
475
476
477
478
479
480
481 public void mutate(Character chainID, int resID, String name) {
482 logger.info(format(" Mutating chain %c residue %d to %s.", chainID, resID, name));
483 mutate = true;
484 if (mutations == null) {
485 mutations = new ArrayList<>();
486 }
487 mutations.add(new Mutation(resID, chainID, name));
488 }
489
490
491
492
493
494
495 public void mutate(List<Mutation> mutations) {
496 if (this.mutations == null) {
497 this.mutations = new ArrayList<>();
498 }
499 this.mutations.addAll(mutations);
500 }
501
502
503 @Override
504 public boolean readFile() {
505 remarkLines = new ArrayList<>();
506
507 int xyzIndex = 1;
508 setFileRead(false);
509 systems.add(activeMolecularAssembly);
510
511 List<String> conects = new ArrayList<>();
512 List<String> links = new ArrayList<>();
513 List<String> ssbonds = new ArrayList<>();
514 List<String> structs = new ArrayList<>();
515 try {
516 for (File file : files) {
517 currentFile = file;
518 if (mutate) {
519 List<Character> chainIDs = new ArrayList<>();
520 try (BufferedReader br = new BufferedReader(new FileReader(file))) {
521 String line = br.readLine();
522 while (line != null) {
523
524 line = line.replaceAll("\t", " ");
525 String identity = line;
526 if (line.length() > 6) {
527 identity = line.substring(0, 6);
528 }
529 identity = identity.trim().toUpperCase();
530 Record record;
531 try {
532 record = Record.valueOf(identity);
533 } catch (Exception e) {
534
535 line = br.readLine();
536 continue;
537 }
538 switch (record) {
539 case ANISOU, HETATM, ATOM -> {
540 char c22 = line.charAt(21);
541 boolean idFound = false;
542 for (Character chainID : chainIDs) {
543 if (c22 == chainID) {
544 idFound = true;
545 break;
546 }
547 }
548 if (!idFound) {
549 chainIDs.add(c22);
550 }
551 }
552 }
553 line = br.readLine();
554 }
555 for (Mutation mtn : mutations) {
556 if (!chainIDs.contains(mtn.chainChar)) {
557 if (chainIDs.size() == 1) {
558 logger.warning(
559 format(" Chain ID %c for mutation not found: only one chain %c found.",
560 mtn.chainChar, chainIDs.get(0)));
561 } else {
562 logger.warning(
563 format(" Chain ID %c for mutation not found: mutation will not proceed.",
564 mtn.chainChar));
565 }
566 }
567 }
568 } catch (IOException ioException) {
569 logger.fine(format(" Exception %s in parsing file to find chain IDs", ioException));
570 }
571 }
572
573
574 if (currentFile == null || !currentFile.exists() || !currentFile.canRead()) {
575 return false;
576 }
577
578
579 try (BufferedReader br = new BufferedReader(new FileReader(currentFile))) {
580
581 if (currentAltLoc == 'A') {
582 logger.info(format(" Reading %s", currentFile.getName()));
583 } else {
584 logger.info(format(" Reading %s alternate location %s", currentFile.getName(), currentAltLoc));
585
586 }
587 activeMolecularAssembly.setAlternateLocation(currentAltLoc);
588
589
590 currentChainID = null;
591 currentSegID = null;
592 boolean containsInsCode = false;
593
594
595 String line = br.readLine();
596
597
598 while (line != null) {
599
600 line = line.replaceAll("\t", " ");
601 String identity = line;
602 if (line.length() > 6) {
603 identity = line.substring(0, 6);
604 }
605 identity = identity.trim().toUpperCase();
606 Record record;
607 try {
608 record = Record.valueOf(identity);
609 } catch (Exception e) {
610
611 line = br.readLine();
612 continue;
613 }
614
615
616 switch (record) {
617 case ENDMDL:
618 case END:
619
620 line = null;
621 continue;
622 case DBREF:
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647 Character chainID = line.substring(12, 13).toUpperCase().charAt(0);
648 int seqBegin = parseInt(line.substring(14, 18).trim());
649 int seqEnd = parseInt(line.substring(20, 24).trim());
650 int[] seqRange = dbRef.computeIfAbsent(chainID, k -> new int[2]);
651 seqRange[0] = seqBegin;
652 seqRange[1] = seqEnd;
653 break;
654 case SEQRES:
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679 activeMolecularAssembly.addHeaderLine(line);
680 chainID = line.substring(11, 12).toUpperCase().charAt(0);
681 int serNum = parseInt(line.substring(7, 10).trim());
682 String[] chain = seqRes.get(chainID);
683 int numRes = parseInt(line.substring(13, 17).trim());
684 if (chain == null) {
685 chain = new String[numRes];
686 seqRes.put(chainID, chain);
687 }
688 int resID = (serNum - 1) * 13;
689 int end = line.length();
690 for (int start = 19; start + 3 <= end; start += 4) {
691 String res = line.substring(start, start + 3).trim();
692 if (res.isEmpty()) {
693 break;
694 }
695 chain[resID++] = res;
696 }
697 break;
698 case MODRES:
699 String modResName = line.substring(12, 15).trim();
700 String stdName = line.substring(24, 27).trim();
701 modRes.put(modResName.toUpperCase(), stdName.toUpperCase());
702 activeMolecularAssembly.addHeaderLine(line);
703
704
705
706
707
708
709
710
711
712
713 break;
714 case ANISOU:
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733 boolean deleteAnisou = properties.getBoolean("delete-anisou", false);
734 double resetBfactors = properties.getDouble("reset-bfactors", -1.0);
735 if (deleteAnisou || resetBfactors >= 0.0) {
736 break;
737 }
738 Integer serial = Hybrid36.decode(5, line.substring(6, 11));
739 Character altLoc = line.substring(16, 17).toUpperCase().charAt(0);
740 if (!altLocs.contains(altLoc)) {
741 altLocs.add(altLoc);
742 }
743 if (!altLoc.equals(' ') && !altLoc.equals('A') && !altLoc.equals(currentAltLoc)) {
744 break;
745 }
746 double[] adp = new double[6];
747 adp[0] = parseInt(line.substring(28, 35).trim()) * 1.0e-4;
748 adp[1] = parseInt(line.substring(35, 42).trim()) * 1.0e-4;
749 adp[2] = parseInt(line.substring(42, 49).trim()) * 1.0e-4;
750 adp[3] = parseInt(line.substring(49, 56).trim()) * 1.0e-4;
751 adp[4] = parseInt(line.substring(56, 63).trim()) * 1.0e-4;
752 adp[5] = parseInt(line.substring(63, 70).trim()) * 1.0e-4;
753 if (atoms.containsKey(serial)) {
754 Atom a = atoms.get(serial);
755 a.setAltLoc(altLoc);
756 a.setAnisou(adp);
757 } else {
758 logger.info(
759 format(" No ATOM record for ANISOU serial number %d has been found.", serial));
760 logger.info(format(" This ANISOU record will be ignored:\n %s", line));
761 }
762 break;
763 case ATOM:
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781 String name;
782 String resName;
783 String segID;
784 int resSeq;
785 boolean printAtom;
786 double[] d;
787 double occupancy;
788 double tempFactor;
789 Atom newAtom;
790 Atom returnedAtom;
791
792 if (!line.substring(17, 20).trim().equals("HOH")) {
793 serial = Hybrid36.decode(5, line.substring(6, 11));
794 name = line.substring(12, 16).trim();
795 if (name.toUpperCase().contains("1H") || name.toUpperCase().contains("2H")
796 || name.toUpperCase().contains("3H")) {
797
798 fileStandard = VERSION3_2;
799 }
800 altLoc = line.substring(16, 17).toUpperCase().charAt(0);
801 if (!altLocs.contains(altLoc)) {
802 altLocs.add(altLoc);
803 }
804 if (!altLoc.equals(' ') && !altLoc.equals(currentAltLoc)) {
805 break;
806 }
807
808
809
810 resName = line.substring(17, 20).trim();
811 chainID = line.substring(21, 22).charAt(0);
812 segID = getSegID(chainID);
813 resSeq = Hybrid36.decode(4, line.substring(22, 26));
814
815 char insertionCode = line.charAt(26);
816 if (insertionCode != ' ' && !containsInsCode) {
817 containsInsCode = true;
818 logger.warning(
819 " FFX support for files with " + "insertion codes is experimental. "
820 + "Residues will be renumbered to " + "eliminate insertion codes (52A "
821 + "becomes 53, 53 becomes 54, etc)");
822 }
823
824 int offset = insertionCodeCount.getOrDefault(chainID, 0);
825 String pdbResNum = format("%c%d%c", chainID, resSeq, insertionCode);
826 if (!pdbToNewResMap.containsKey(pdbResNum)) {
827 if (insertionCode != ' ') {
828 ++offset;
829 insertionCodeCount.put(chainID, offset);
830 }
831 resSeq += offset;
832 if (offset != 0) {
833 logger.info(
834 format(" Chain %c " + "residue %s-%s renumbered to %c %s-%d", chainID,
835 pdbResNum.substring(1).trim(), resName, chainID, resName, resSeq));
836 }
837 String newNum = format("%c%d", chainID, resSeq);
838 pdbToNewResMap.put(pdbResNum, newNum);
839 } else {
840 resSeq += offset;
841 }
842
843 printAtom = false;
844 if (mutate) {
845 boolean doBreak = false;
846 for (Mutation mtn : mutations) {
847 if (chainID == mtn.chainChar && resSeq == mtn.resID) {
848 String atomName = name.toUpperCase();
849 if (backboneNames.contains(atomName)) {
850 printAtom = true;
851 resName = mtn.resName;
852 } else {
853 logger.info(
854 format(" Deleting atom %s of %s %d", atomName, resName, resSeq));
855 doBreak = true;
856 break;
857 }
858 }
859 }
860 if (doBreak) {
861 break;
862 }
863 }
864
865 if (constantPH) {
866 AminoAcid3 aa3 = AminoAcidUtils.getAminoAcid(resName.toUpperCase());
867 if (constantPHResidueMap.containsKey(aa3)) {
868 String atomName = name.toUpperCase();
869 AminoAcid3 aa3PH = constantPHResidueMap.get(aa3);
870 resName = aa3PH.name();
871 if (constantPhBackboneNames.contains(atomName)) {
872 logger.info(format(" %s-%d %s", resName, resSeq, atomName));
873 } else if (!atomName.startsWith("H")) {
874 logger.info(format(" %s-%d %s", resName, resSeq, atomName));
875 } else {
876 logger.info(format(" %s-%d %s skipped", resName, resSeq, atomName));
877 break;
878 }
879 }
880 } else if (rotamerTitration) {
881 AminoAcid3 aa3 = AminoAcidUtils.getAminoAcid(resName.toUpperCase());
882 if (rotamerResidueMap.containsKey(aa3) && resNumberList.contains(resSeq)) {
883 AminoAcid3 aa3rotamer = rotamerResidueMap.get(aa3);
884 resName = aa3rotamer.name();
885 }
886 }
887 d = new double[3];
888 d[0] = parseDouble(line.substring(30, 38).trim());
889 d[1] = parseDouble(line.substring(38, 46).trim());
890 d[2] = parseDouble(line.substring(46, 54).trim());
891 occupancy = 1.0;
892 tempFactor = 1.0;
893 try {
894 occupancy = parseDouble(line.substring(54, 60).trim());
895 tempFactor = parseDouble(line.substring(60, 66).trim());
896 } catch (NumberFormatException | StringIndexOutOfBoundsException e) {
897
898 if (printMissingFields) {
899 logger.info(" Missing occupancy and b-factors set to 1.0.");
900 printMissingFields = false;
901 } else if (logger.isLoggable(Level.FINE)) {
902 logger.fine(" Missing occupancy and b-factors set to 1.0.");
903 }
904 }
905
906 double bfactor = properties.getDouble("reset-bfactors", -1.0);
907 if (bfactor >= 0.0) {
908 tempFactor = bfactor;
909 }
910
911 newAtom = new Atom(0, name, altLoc, d, resName, resSeq, chainID, occupancy, tempFactor, segID);
912
913
914 if (modRes.containsKey(resName.toUpperCase())) {
915 newAtom.setModRes(true);
916 }
917 returnedAtom = (Atom) activeMolecularAssembly.addMSNode(newAtom);
918 if (returnedAtom != newAtom) {
919
920 atoms.put(serial, returnedAtom);
921 if (logger.isLoggable(Level.FINE)) {
922 logger.fine(returnedAtom + " has been retained over\n" + newAtom);
923 }
924 } else {
925
926 atoms.put(serial, newAtom);
927
928 if (newAtom.getIndex() == 0) {
929 newAtom.setXyzIndex(xyzIndex++);
930 }
931 if (printAtom) {
932 logger.info(newAtom.toString());
933 }
934 }
935 break;
936 }
937 case HETATM:
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955 serial = Hybrid36.decode(5, line.substring(6, 11));
956 name = line.substring(12, 16).trim();
957 altLoc = line.substring(16, 17).toUpperCase().charAt(0);
958 if (!altLocs.contains(altLoc)) {
959 altLocs.add(altLoc);
960 }
961 if (!altLoc.equals(' ') && !altLoc.equals(currentAltLoc)) {
962 break;
963 }
964
965
966
967 resName = line.substring(17, 20).trim();
968 chainID = line.substring(21, 22).charAt(0);
969 segID = getSegID(chainID);
970 resSeq = Hybrid36.decode(4, line.substring(22, 26));
971
972 char insertionCode = line.charAt(26);
973 if (insertionCode != ' ' && !containsInsCode) {
974 containsInsCode = true;
975 logger.warning(" FFX support for files with " + "insertion codes is experimental. "
976 + "Residues will be renumbered to " + "eliminate insertion codes (52A "
977 + "becomes 53, 53 becomes 54, etc)");
978 }
979
980 int offset = insertionCodeCount.getOrDefault(chainID, 0);
981 String pdbResNum = format("%c%d%c", chainID, resSeq, insertionCode);
982 if (!pdbToNewResMap.containsKey(pdbResNum)) {
983 if (insertionCode != ' ') {
984 ++offset;
985 insertionCodeCount.put(chainID, offset);
986 }
987 resSeq += offset;
988 if (offset != 0) {
989 logger.info(
990 format(" Chain %c " + "molecule %s-%s renumbered to %c %s-%d", chainID,
991 pdbResNum.substring(1).trim(), resName, chainID, resName, resSeq));
992 }
993 String newNum = format("%c%d", chainID, resSeq);
994 pdbToNewResMap.put(pdbResNum, newNum);
995 } else {
996 resSeq += offset;
997 }
998
999 d = new double[3];
1000 d[0] = parseDouble(line.substring(30, 38).trim());
1001 d[1] = parseDouble(line.substring(38, 46).trim());
1002 d[2] = parseDouble(line.substring(46, 54).trim());
1003 occupancy = 1.0;
1004 tempFactor = 1.0;
1005 try {
1006 occupancy = parseDouble(line.substring(54, 60).trim());
1007 tempFactor = parseDouble(line.substring(60, 66).trim());
1008 } catch (NumberFormatException | StringIndexOutOfBoundsException e) {
1009
1010 if (printMissingFields) {
1011 logger.info(" Missing occupancy and b-factors set to 1.0.");
1012 printMissingFields = false;
1013 } else if (logger.isLoggable(Level.FINE)) {
1014 logger.fine(" Missing occupancy and b-factors set to 1.0.");
1015 }
1016 }
1017
1018 double bfactor = properties.getDouble("reset-bfactors", -1.0);
1019 if (bfactor >= 0.0) {
1020 tempFactor = bfactor;
1021 }
1022
1023 newAtom = new Atom(0, name, altLoc, d, resName, resSeq, chainID, occupancy, tempFactor, segID);
1024 newAtom.setHetero(true);
1025
1026 if (modRes.containsKey(resName.toUpperCase())) {
1027 newAtom.setModRes(true);
1028 }
1029 returnedAtom = (Atom) activeMolecularAssembly.addMSNode(newAtom);
1030 if (returnedAtom != newAtom) {
1031
1032 atoms.put(serial, returnedAtom);
1033 if (logger.isLoggable(Level.FINE)) {
1034 logger.fine(returnedAtom + " has been retained over\n" + newAtom);
1035 }
1036 } else {
1037
1038 atoms.put(serial, newAtom);
1039 newAtom.setXyzIndex(xyzIndex++);
1040 }
1041 break;
1042 case CRYST1:
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057 if (line.length() < 55) {
1058 logger.severe(" CRYST1 record is improperly formatted.");
1059 }
1060 double aaxis = parseDouble(line.substring(6, 15).trim());
1061 double baxis = parseDouble(line.substring(15, 24).trim());
1062 double caxis = parseDouble(line.substring(24, 33).trim());
1063 double alpha = parseDouble(line.substring(33, 40).trim());
1064 double beta = parseDouble(line.substring(40, 47).trim());
1065 double gamma = parseDouble(line.substring(47, 54).trim());
1066 int limit = min(line.length(), 66);
1067 String sg = line.substring(55, limit).trim();
1068 properties.addProperty("a-axis", aaxis);
1069 properties.addProperty("b-axis", baxis);
1070 properties.addProperty("c-axis", caxis);
1071 properties.addProperty("alpha", alpha);
1072 properties.addProperty("beta", beta);
1073 properties.addProperty("gamma", gamma);
1074 properties.addProperty("spacegroup", SpaceGroupInfo.pdb2ShortName(sg));
1075 break;
1076 case CONECT:
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089 conects.add(line);
1090 break;
1091 case LINK:
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113 char a1 = line.charAt(16);
1114 char a2 = line.charAt(46);
1115 if (a1 != a2) {
1116
1117
1118 break;
1119 }
1120 if (currentAltLoc == 'A') {
1121 if ((a1 == ' ' || a1 == 'A') && (a2 == ' ' || a2 == 'A')) {
1122 links.add(line);
1123 }
1124 } else if (a1 == currentAltLoc && a2 == currentAltLoc) {
1125 links.add(line);
1126 }
1127 break;
1128 case SSBOND:
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157 ssbonds.add(line);
1158 break;
1159 case HELIX:
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198 case SHEET:
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233 structs.add(line);
1234 break;
1235 case MODEL:
1236 break;
1237 case MTRIX1:
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254 StringBuilder MTRX1 = new StringBuilder(line.substring(11, 55));
1255 properties.addProperty("MTRIX1", MTRX1);
1256 break;
1257 case MTRIX2:
1258 StringBuilder MTRX2 = new StringBuilder(line.substring(11, 55));
1259 properties.addProperty("MTRIX2", MTRX2);
1260 break;
1261 case MTRIX3:
1262 StringBuilder MTRX3 = new StringBuilder(line.substring(11, 55));
1263 properties.addProperty("MTRIX3", MTRX3);
1264 break;
1265 case REMARK:
1266 remarkLines.add(line.trim());
1267 if (line.contains("Lambda:")) {
1268 Matcher m = lambdaPattern.matcher(line);
1269 if (m.find()) {
1270 lastReadLambda = Double.parseDouble(m.group(1));
1271 }
1272 }
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287 if (line.length() >= 68) {
1288 String remarkType = line.substring(7, 10).trim();
1289 if (remarkType.matches("\\d+") && parseInt(remarkType) == 350 && line.substring(13,
1290 18).equalsIgnoreCase("BIOMT")) {
1291 properties.addProperty("BIOMTn", new StringBuilder(line.substring(24, 68)));
1292 }
1293 }
1294 break;
1295 default:
1296 break;
1297 }
1298 line = br.readLine();
1299 }
1300
1301 } catch (FileNotFoundException fileNotFoundException) {
1302 logger.log(Level.SEVERE, " PDB file not found", fileNotFoundException);
1303 }
1304 }
1305 xyzIndex--;
1306 setFileRead(true);
1307 } catch (IOException e) {
1308 logger.exiting(PDBFilter.class.getName(), "readFile", e);
1309 return false;
1310 }
1311
1312
1313 List<Bond> ssBondList = locateDisulfideBonds(ssbonds, activeMolecularAssembly, pdbToNewResMap);
1314
1315
1316
1317 int pdbAtoms = activeMolecularAssembly.getAtomArray().length;
1318
1319
1320 buildMissingResidues(xyzIndex, activeMolecularAssembly, seqRes, dbRef);
1321
1322
1323 bondList = assignAtomTypes(activeMolecularAssembly, fileStandard);
1324
1325
1326 buildDisulfideBonds(ssBondList, activeMolecularAssembly, bondList);
1327
1328
1329 int currentN = activeMolecularAssembly.getAtomArray().length;
1330 if (pdbAtoms != currentN) {
1331 logger.info(format(" Renumbering PDB file due to built atoms (%d vs %d)", currentN, pdbAtoms));
1332 numberAtoms(activeMolecularAssembly);
1333 }
1334 return true;
1335 }
1336
1337
1338 @Override
1339 public boolean readNext() {
1340 return readNext(false);
1341 }
1342
1343
1344 @Override
1345 public boolean readNext(boolean resetPosition) {
1346 return readNext(resetPosition, false);
1347 }
1348
1349
1350 @Override
1351 public boolean readNext(boolean resetPosition, boolean print) {
1352 return readNext(resetPosition, print, true);
1353 }
1354
1355
1356 @Override
1357 public boolean readNext(boolean resetPosition, boolean print, boolean parse) {
1358 modelsRead = resetPosition ? 1 : modelsRead + 1;
1359 if (!parse) {
1360 if (print) {
1361 logger.info(format(" Skipped Model %d.", modelsRead));
1362 }
1363 return true;
1364 }
1365 remarkLines = new ArrayList<>(remarkLines.size());
1366
1367
1368 Pattern modelPatt = Pattern.compile("^MODEL\\s+(\\d+)");
1369 boolean eof = true;
1370 for (MolecularAssembly system : systems) {
1371 try {
1372 BufferedReader currentReader;
1373 if (readers.containsKey(system)) {
1374 currentReader = readers.get(system);
1375 try {
1376 if (!currentReader.ready()) {
1377 currentReader = new BufferedReader(new FileReader(readFile));
1378
1379 currentReader.mark(0);
1380 readers.remove(system);
1381 readers.put(system, currentReader);
1382 } else if (resetPosition) {
1383
1384 currentReader.reset();
1385 }
1386 } catch (Exception exception) {
1387
1388
1389 currentReader = new BufferedReader(new FileReader(readFile));
1390
1391 currentReader.mark(0);
1392 readers.remove(system);
1393 readers.put(system, currentReader);
1394 }
1395 } else {
1396 currentReader = new BufferedReader(new FileReader(readFile));
1397
1398 currentReader.mark(0);
1399 readers.put(system, currentReader);
1400 }
1401
1402
1403 String line = currentReader.readLine();
1404 while (line != null) {
1405 line = line.trim();
1406 Matcher m = modelPatt.matcher(line);
1407 if (m.find()) {
1408 int modelNum = parseInt(m.group(1));
1409 if (modelNum == modelsRead) {
1410 if (print) {
1411 logger.log(Level.INFO, format(" Reading model %d for %s", modelNum, currentFile));
1412 }
1413 eof = false;
1414 break;
1415 }
1416 }
1417 line = currentReader.readLine();
1418 }
1419 if (eof) {
1420 if (logger.isLoggable(Level.FINEST)) {
1421 logger.log(Level.FINEST, format("\n End of file reached for %s", readFile));
1422 }
1423 currentReader.close();
1424 return false;
1425 }
1426
1427
1428 boolean modelDone = false;
1429 line = currentReader.readLine();
1430 while (line != null) {
1431 line = line.trim();
1432 String recID = line.substring(0, Math.min(6, line.length())).trim();
1433 try {
1434 Record record = Record.valueOf(recID);
1435 boolean hetatm = true;
1436 switch (record) {
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461 case ATOM:
1462 hetatm = false;
1463 case HETATM:
1464 String name = line.substring(12, 16).trim();
1465 if (name.toUpperCase().contains("1H") || name.toUpperCase().contains("2H")
1466 || name.toUpperCase().contains("3H")) {
1467
1468 fileStandard = VERSION3_2;
1469 }
1470 Character altLoc = line.substring(16, 17).toUpperCase().charAt(0);
1471 if (!altLoc.equals(' ') && !altLoc.equals(currentAltLoc)) {
1472 break;
1473 }
1474
1475
1476
1477 String resName = line.substring(17, 20).trim();
1478 Character chainID = line.substring(21, 22).charAt(0);
1479 String segID = getExistingSegID(chainID);
1480 int resSeq = Hybrid36.decode(4, line.substring(22, 26));
1481 double[] d = new double[3];
1482 d[0] = parseDouble(line.substring(30, 38).trim());
1483 d[1] = parseDouble(line.substring(38, 46).trim());
1484 d[2] = parseDouble(line.substring(46, 54).trim());
1485 double occupancy = 1.0;
1486 double tempFactor = 1.0;
1487 Atom newAtom = new Atom(0, name, altLoc, d, resName, resSeq, chainID, occupancy,
1488 tempFactor, segID);
1489 newAtom.setHetero(hetatm);
1490
1491 if (modRes.containsKey(resName.toUpperCase())) {
1492 newAtom.setModRes(true);
1493 }
1494
1495 Atom returnedAtom = activeMolecularAssembly.findAtom(newAtom);
1496 if (returnedAtom != null) {
1497 returnedAtom.setXYZ(d);
1498 double[] retXYZ = new double[3];
1499 returnedAtom.getXYZ(retXYZ);
1500 } else {
1501 String message = format(" Could not find atom %s in assembly", newAtom);
1502 if (dieOnMissingAtom) {
1503 logger.severe(message);
1504 } else {
1505 logger.warning(message);
1506 }
1507 }
1508 break;
1509 case CRYST1:
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524 logger.info(" Crystal record found.");
1525 if (line.length() < 55) {
1526 logger.severe(" CRYST1 record is improperly formatted.");
1527 }
1528 double aaxis = parseDouble(line.substring(6, 15).trim());
1529 double baxis = parseDouble(line.substring(15, 24).trim());
1530 double caxis = parseDouble(line.substring(24, 33).trim());
1531 double alpha = parseDouble(line.substring(33, 40).trim());
1532 double beta = parseDouble(line.substring(40, 47).trim());
1533 double gamma = parseDouble(line.substring(47, 54).trim());
1534 int limit = min(line.length(), 66);
1535 String sg = line.substring(55, limit).trim();
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551 Crystal crystal = activeMolecularAssembly.getCrystal();
1552 SpaceGroup spaceGroup = SpaceGroupDefinitions.spaceGroupFactory(sg);
1553 if (Objects.equals(crystal.spaceGroup.shortName, spaceGroup.shortName)) {
1554 crystal.changeUnitCellParameters(aaxis, baxis, caxis, alpha, beta, gamma);
1555 } else {
1556
1557 logger.warning(format(" Original space group %s could not be changed to %s",
1558 crystal.spaceGroup.shortName, spaceGroup.shortName));
1559 }
1560 break;
1561 case ENDMDL:
1562 case END:
1563
1564 logger.log(Level.FINE, format(" Model %d successfully read", modelsRead));
1565 modelDone = true;
1566 break;
1567 case REMARK:
1568 remarkLines.add(line.trim());
1569 if (line.contains("Lambda:")) {
1570 Matcher m = lambdaPattern.matcher(line);
1571 if (m.find()) {
1572 lastReadLambda = Double.parseDouble(m.group(1));
1573 }
1574 }
1575 break;
1576 default:
1577 break;
1578 }
1579 } catch (Exception ex) {
1580
1581 }
1582 if (modelDone) {
1583 break;
1584 }
1585 line = currentReader.readLine();
1586 }
1587 return true;
1588 } catch (IOException ex) {
1589 logger.info(
1590 format(" Exception in parsing frame %d of %s:" + " %s", modelsRead, system.toString(),
1591 ex));
1592 }
1593 }
1594 return false;
1595 }
1596
1597
1598
1599
1600
1601
1602
1603 public void setAltID(MolecularAssembly molecularAssembly, Character altLoc) {
1604 setMolecularSystem(molecularAssembly);
1605 currentAltLoc = altLoc;
1606 }
1607
1608
1609
1610
1611
1612
1613 public void setLogWrites(boolean logWrites) {
1614 this.logWrites = logWrites;
1615 }
1616
1617
1618
1619
1620
1621
1622 public void setModelNumbering(int modelsWritten) {
1623 this.modelsWritten = modelsWritten;
1624 }
1625
1626 public void setLMN(int[] lmn) {
1627 if(lmn[0] >= 1 && lmn[1] >= 1 && lmn[2] >= 1){
1628 this.lmn = lmn;
1629 }else{
1630
1631 this.lmn = new int[]{1,1,1};
1632 }
1633 }
1634
1635
1636
1637
1638
1639
1640 public void setSymOp(int symOp) {
1641 this.nSymOp = symOp;
1642 }
1643
1644
1645
1646
1647
1648
1649
1650 public boolean writeFileAsP1(File file) {
1651
1652 Crystal crystal = activeMolecularAssembly.getCrystal();
1653 int nSymOps = crystal.getUnitCell().spaceGroup.getNumberOfSymOps();
1654
1655 if (nSymOps == 1) {
1656
1657 if (!writeFile(file, false)) {
1658 logger.info(format(" Save failed for %s", activeMolecularAssembly));
1659 return false;
1660 } else {
1661 return true;
1662 }
1663 } else {
1664 Polymer[] polymers = activeMolecularAssembly.getChains();
1665 int chainCount = 0;
1666 for (Polymer polymer : polymers) {
1667 Character chainID = Polymer.CHAIN_IDS.charAt(chainCount++);
1668 polymer.setChainID(chainID);
1669 polymer.setSegID(chainID.toString());
1670 }
1671 nSymOp = 0;
1672 logWrites = false;
1673 boolean writeEnd = false;
1674 if (!writeFile(file, false, false, writeEnd)) {
1675 logger.info(format(" Save failed for %s", activeMolecularAssembly));
1676 return false;
1677 } else {
1678 for (int i = 1; i < nSymOps; i++) {
1679 nSymOp = i;
1680 for (Polymer polymer : polymers) {
1681 Character chainID = Polymer.CHAIN_IDS.charAt(chainCount++);
1682 polymer.setChainID(chainID);
1683 polymer.setSegID(chainID.toString());
1684 }
1685 writeEnd = i == nSymOps - 1;
1686 if (!writeFile(file, true, false, writeEnd)) {
1687 logger.info(format(" Save failed for %s", activeMolecularAssembly));
1688 return false;
1689 }
1690 }
1691 }
1692
1693
1694 chainCount = 0;
1695 for (Polymer polymer : polymers) {
1696 Character chainID = Polymer.CHAIN_IDS.charAt(chainCount++);
1697 polymer.setChainID(chainID);
1698 polymer.setSegID(chainID.toString());
1699 }
1700
1701 }
1702
1703 return true;
1704 }
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715 public boolean writeFile(File saveFile, boolean append, boolean printLinear, boolean writeEnd) {
1716 return writeFile(saveFile, append, Collections.emptySet(), writeEnd, true);
1717 }
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731 public boolean writeFile(File saveFile, boolean append, Set<Atom> toExclude, boolean writeEnd,
1732 boolean versioning) {
1733 return writeFile(saveFile, append, toExclude, writeEnd, versioning, null);
1734 }
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749 public boolean writeFile(File saveFile, boolean append, Set<Atom> toExclude, boolean writeEnd,
1750 boolean versioning, String[] extraLines) {
1751 if (standardizeAtomNames) {
1752 logger.info(" Setting atom names to PDB standard.");
1753 renameAtomsToPDBStandard(activeMolecularAssembly);
1754 }
1755 final Set<Atom> atomExclusions = toExclude == null ? Collections.emptySet() : toExclude;
1756 if (saveFile == null) {
1757 return false;
1758 }
1759 if (vdwH) {
1760 logger.info(" Saving hydrogen to van der Waals centers instead of nuclear locations.");
1761 }
1762 if (nSymOp > -1) {
1763 logger.info(format(" Saving atoms using the symmetry operator:\n%s\n",
1764 activeMolecularAssembly.getCrystal().getUnitCell().spaceGroup.getSymOp(nSymOp)
1765 .toString()));
1766 }
1767
1768
1769 StringBuilder sb = new StringBuilder("ATOM ");
1770 StringBuilder anisouSB = new StringBuilder("ANISOU");
1771 StringBuilder terSB = new StringBuilder("TER ");
1772 StringBuilder model = null;
1773 for (int i = 6; i < 80; i++) {
1774 sb.append(' ');
1775 anisouSB.append(' ');
1776 terSB.append(' ');
1777 }
1778
1779 File newFile = saveFile;
1780 if (!append) {
1781 if (versioning) {
1782 newFile = version(saveFile);
1783 }
1784 } else if (modelsWritten >= 0) {
1785 model = new StringBuilder(format("MODEL %-4d", ++modelsWritten));
1786 model.append(repeat(" ", 65));
1787 }
1788 activeMolecularAssembly.setFile(newFile);
1789 if (activeMolecularAssembly.getName() == null) {
1790 activeMolecularAssembly.setName(newFile.getName());
1791 }
1792 if (logWrites) {
1793 logger.log(Level.INFO, " Saving {0}", newFile.getName());
1794 }
1795
1796 try (FileWriter fw = new FileWriter(newFile, append);
1797 BufferedWriter bw = new BufferedWriter(fw)) {
1798
1799
1800
1801
1802 String[] headerLines = activeMolecularAssembly.getHeaderLines();
1803 for (String line : headerLines) {
1804 bw.write(format("%s\n", line));
1805 }
1806 if (extraLines != null) {
1807 if (rotamerTitration && extraLines[0].contains("REMARK")) {
1808 for (String line : extraLines) {
1809 bw.write(line + "\n");
1810 }
1811 } else {
1812 for (String line : extraLines) {
1813 bw.write(format("REMARK 999 %s\n", line));
1814 }
1815 }
1816 }
1817 if (model != null) {
1818 bw.write(model.toString());
1819 bw.newLine();
1820 }
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835 if (nSymOp < 0) {
1836
1837 Crystal crystal = activeMolecularAssembly.getCrystal();
1838 if (crystal != null && !crystal.aperiodic()) {
1839 Crystal c = crystal.getUnitCell();
1840 bw.write(c.toCRYST1());
1841 }
1842 } else if (nSymOp == 0) {
1843
1844 Crystal crystal = activeMolecularAssembly.getCrystal();
1845 if (crystal != null && !crystal.aperiodic()) {
1846 Crystal c = crystal.getUnitCell();
1847 Crystal p1 = new Crystal(c.a, c.b, c.c, c.alpha, c.beta, c.gamma, "P1");
1848 bw.write(p1.toCRYST1());
1849 }
1850 }
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875 int serNum = 1;
1876 Polymer[] polymers = activeMolecularAssembly.getChains();
1877 if (polymers != null) {
1878 for (Polymer polymer : polymers) {
1879 List<Residue> residues = polymer.getResidues();
1880 for (Residue residue : residues) {
1881 if (residue.getName().equalsIgnoreCase("CYS")) {
1882 List<Atom> cysAtoms = residue.getAtomList().stream()
1883 .filter(a -> !atomExclusions.contains(a)).toList();
1884 Atom SG1 = null;
1885 for (Atom atom : cysAtoms) {
1886 String atName = atom.getName().toUpperCase();
1887 if (atName.equals("SG") || atName.equals("SH")
1888 || atom.getAtomType().atomicNumber == 16) {
1889 SG1 = atom;
1890 break;
1891 }
1892 }
1893 List<Bond> bonds = SG1.getBonds();
1894 for (Bond bond : bonds) {
1895 Atom SG2 = bond.get1_2(SG1);
1896 if (SG2.getAtomType().atomicNumber == 16 && !atomExclusions.contains(SG2)) {
1897 if (SG1.getIndex() < SG2.getIndex()) {
1898 bond.energy(false);
1899 bw.write(format("SSBOND %3d CYS %1s %4s CYS %1s %4s %36s %5.2f\n", serNum++,
1900 SG1.getChainID().toString(), Hybrid36.encode(4, SG1.getResidueNumber()),
1901 SG2.getChainID().toString(), Hybrid36.encode(4, SG2.getResidueNumber()), "",
1902 bond.getValue()));
1903 }
1904 }
1905 }
1906 }
1907 }
1908 }
1909 }
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931 MolecularAssembly[] molecularAssemblies = this.getMolecularAssemblyArray();
1932 int serial = 1;
1933 if (nSymOp > 0) {
1934 serial = serialP1;
1935 }
1936
1937
1938 if (polymers != null) {
1939 for (Polymer polymer : polymers) {
1940 currentSegID = polymer.getName();
1941 currentChainID = polymer.getChainID();
1942 sb.setCharAt(21, currentChainID);
1943
1944 List<Residue> residues = polymer.getResidues();
1945 for (Residue residue : residues) {
1946 String resName = residue.getName();
1947 if (resName.length() > 3) {
1948 resName = resName.substring(0, 3);
1949 }
1950 int resID = residue.getResidueNumber();
1951 sb.replace(17, 20, padLeft(resName.toUpperCase(), 3));
1952 sb.replace(22, 26, format("%4s", Hybrid36.encode(4, resID)));
1953
1954 List<Atom> residueAtoms = residue.getAtomList().stream()
1955 .filter(a -> !atomExclusions.contains(a)).collect(Collectors.toList());
1956 boolean altLocFound = false;
1957 for (Atom atom : residueAtoms) {
1958 writeAtom(atom, serial++, sb, anisouSB, bw);
1959 Character altLoc = atom.getAltLoc();
1960 if (altLoc != null && !altLoc.equals(' ')) {
1961 altLocFound = true;
1962 }
1963 }
1964
1965 if (altLocFound) {
1966 for (int ma = 1; ma < molecularAssemblies.length; ma++) {
1967 MolecularAssembly altMolecularAssembly = molecularAssemblies[ma];
1968 Polymer altPolymer = altMolecularAssembly.getPolymer(currentChainID, currentSegID, false);
1969 Residue altResidue = altPolymer.getResidue(resName, resID, false, Residue.ResidueType.AA);
1970 if (altResidue == null) {
1971 resName = AminoAcid3.UNK.name();
1972 altResidue = altPolymer.getResidue(resName, resID, false, Residue.ResidueType.AA);
1973 }
1974 residueAtoms = altResidue.getAtomList().stream()
1975 .filter(a -> !atomExclusions.contains(a)).collect(Collectors.toList());
1976 for (Atom atom : residueAtoms) {
1977 if (atom.getAltLoc() != null && !atom.getAltLoc().equals(' ') && !atom.getAltLoc()
1978 .equals('A')) {
1979 sb.replace(17, 20, padLeft(atom.getResidueName().toUpperCase(), 3));
1980 writeAtom(atom, serial++, sb, anisouSB, bw);
1981 }
1982 }
1983 }
1984 }
1985 }
1986 terSB.replace(6, 11, format("%5s", Hybrid36.encode(5, serial++)));
1987 terSB.replace(12, 16, " ");
1988 terSB.replace(16, 26, sb.substring(16, 26));
1989 bw.write(terSB.toString());
1990 bw.newLine();
1991 }
1992 }
1993 sb.replace(0, 6, "HETATM");
1994 sb.setCharAt(21, 'A');
1995
1996 Character chainID = 'A';
1997 if (polymers != null) {
1998 chainID = polymers[0].getChainID();
1999 }
2000 activeMolecularAssembly.setChainIDAndRenumberMolecules(chainID);
2001
2002
2003 List<MSNode> molecules = activeMolecularAssembly.getMolecules();
2004 int numMolecules = molecules.size();
2005 for (int i = 0; i < numMolecules; i++) {
2006 Molecule molecule = (Molecule) molecules.get(i);
2007 chainID = molecule.getChainID();
2008 sb.setCharAt(21, chainID);
2009 String resName = molecule.getResidueName();
2010 int resID = molecule.getResidueNumber();
2011 if (resName.length() > 3) {
2012 resName = resName.substring(0, 3);
2013 }
2014 sb.replace(17, 20, padLeft(resName.toUpperCase(), 3));
2015 sb.replace(22, 26, format("%4s", Hybrid36.encode(4, resID)));
2016
2017 List<Atom> moleculeAtoms = molecule.getAtomList().stream()
2018 .filter(a -> !atomExclusions.contains(a)).collect(Collectors.toList());
2019 boolean altLocFound = false;
2020 for (Atom atom : moleculeAtoms) {
2021 writeAtom(atom, serial++, sb, anisouSB, bw);
2022 Character altLoc = atom.getAltLoc();
2023 if (altLoc != null && !altLoc.equals(' ')) {
2024 altLocFound = true;
2025 }
2026 }
2027
2028 if (altLocFound) {
2029 for (int ma = 1; ma < molecularAssemblies.length; ma++) {
2030 MolecularAssembly altMolecularAssembly = molecularAssemblies[ma];
2031 MSNode altmolecule = altMolecularAssembly.getMolecules().get(i);
2032 moleculeAtoms = altmolecule.getAtomList();
2033 for (Atom atom : moleculeAtoms) {
2034 if (atom.getAltLoc() != null && !atom.getAltLoc().equals(' ') && !atom.getAltLoc()
2035 .equals('A')) {
2036 writeAtom(atom, serial++, sb, anisouSB, bw);
2037 }
2038 }
2039 }
2040 }
2041 }
2042
2043 List<MSNode> ions = activeMolecularAssembly.getIons();
2044 for (int i = 0; i < ions.size(); i++) {
2045 Molecule ion = (Molecule) ions.get(i);
2046 chainID = ion.getChainID();
2047 sb.setCharAt(21, chainID);
2048 String resName = ion.getResidueName();
2049 int resID = ion.getResidueNumber();
2050 if (resName.length() > 3) {
2051 resName = resName.substring(0, 3);
2052 }
2053 sb.replace(17, 20, padLeft(resName.toUpperCase(), 3));
2054 sb.replace(22, 26, format("%4s", Hybrid36.encode(4, resID)));
2055
2056 List<Atom> ionAtoms = ion.getAtomList().stream().filter(a -> !atomExclusions.contains(a))
2057 .collect(Collectors.toList());
2058 boolean altLocFound = false;
2059 for (Atom atom : ionAtoms) {
2060 writeAtom(atom, serial++, sb, anisouSB, bw);
2061 Character altLoc = atom.getAltLoc();
2062 if (altLoc != null && !altLoc.equals(' ')) {
2063 altLocFound = true;
2064 }
2065 }
2066
2067 if (altLocFound) {
2068 for (int ma = 1; ma < molecularAssemblies.length; ma++) {
2069 MolecularAssembly altMolecularAssembly = molecularAssemblies[ma];
2070 MSNode altion = altMolecularAssembly.getIons().get(i);
2071 ionAtoms = altion.getAtomList();
2072 for (Atom atom : ionAtoms) {
2073 if (atom.getAltLoc() != null && !atom.getAltLoc().equals(' ') && !atom.getAltLoc()
2074 .equals('A')) {
2075 writeAtom(atom, serial++, sb, anisouSB, bw);
2076 }
2077 }
2078 }
2079 }
2080 }
2081
2082 List<MSNode> water = activeMolecularAssembly.getWater();
2083 for (int i = 0; i < water.size(); i++) {
2084 Molecule wat = (Molecule) water.get(i);
2085 chainID = wat.getChainID();
2086 sb.setCharAt(21, chainID);
2087 String resName = wat.getResidueName();
2088 int resID = wat.getResidueNumber();
2089 if (resName.length() > 3) {
2090 resName = resName.substring(0, 3);
2091 }
2092 sb.replace(17, 20, padLeft(resName.toUpperCase(), 3));
2093 sb.replace(22, 26, format("%4s", Hybrid36.encode(4, resID)));
2094 List<Atom> waterAtoms = wat.getAtomList().stream().filter(a -> !atomExclusions.contains(a))
2095 .collect(Collectors.toList());
2096 boolean altLocFound = false;
2097 for (Atom atom : waterAtoms) {
2098 writeAtom(atom, serial++, sb, anisouSB, bw);
2099 Character altLoc = atom.getAltLoc();
2100 if (altLoc != null && !altLoc.equals(' ')) {
2101 altLocFound = true;
2102 }
2103 }
2104
2105 if (altLocFound) {
2106 for (int ma = 1; ma < molecularAssemblies.length; ma++) {
2107 MolecularAssembly altMolecularAssembly = molecularAssemblies[ma];
2108 MSNode altwater = altMolecularAssembly.getWater().get(i);
2109 waterAtoms = altwater.getAtomList();
2110 for (Atom atom : waterAtoms) {
2111 if (atom.getAltLoc() != null && !atom.getAltLoc().equals(' ') && !atom.getAltLoc()
2112 .equals('A')) {
2113 writeAtom(atom, serial++, sb, anisouSB, bw);
2114 }
2115 }
2116 }
2117 }
2118 }
2119
2120 if (model != null) {
2121 bw.write("ENDMDL");
2122 bw.newLine();
2123 }
2124
2125 if (writeEnd) {
2126 bw.write("END");
2127 bw.newLine();
2128 }
2129
2130 if (nSymOp >= 0) {
2131 serialP1 = serial;
2132 }
2133
2134 } catch (Exception e) {
2135 String message = "Exception writing to file: " + saveFile;
2136 logger.log(Level.WARNING, message, e);
2137 return false;
2138 }
2139 return true;
2140 }
2141
2142
2143
2144
2145
2146
2147 @Override
2148 public boolean writeFile(File saveFile, boolean append) {
2149 return writeFile(saveFile, append, false, true);
2150 }
2151
2152 public boolean writeFile(File saveFile, boolean append, String[] extraLines) {
2153 return writeFile(saveFile, append, Collections.emptySet(), false, !append, extraLines);
2154 }
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166 public boolean writeFile(File saveFile, boolean append, boolean versioning) {
2167 return writeFile(saveFile, append, Collections.emptySet(), true, versioning);
2168 }
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178 public boolean writeFileWithHeader(File saveFile, String header, boolean append) {
2179 if (standardizeAtomNames) {
2180 logger.info(" Setting atom names to PDB standard.");
2181 renameAtomsToPDBStandard(activeMolecularAssembly);
2182 }
2183 activeMolecularAssembly.setFile(saveFile);
2184 activeMolecularAssembly.setName(saveFile.getName());
2185
2186 try (FileWriter fw = new FileWriter(saveFile, append); BufferedWriter bw = new BufferedWriter(
2187 fw)) {
2188 bw.write(header);
2189 bw.newLine();
2190 } catch (Exception e) {
2191 String message = " Exception writing to file: " + saveFile;
2192 logger.log(Level.WARNING, message, e);
2193 return false;
2194 }
2195 if (writeFile(saveFile, true)) {
2196 logger.log(Level.INFO, " Wrote PDB to file {0}", saveFile.getPath());
2197 return true;
2198 } else {
2199 logger.log(Level.INFO, " Error writing to file {0}", saveFile.getPath());
2200 return false;
2201 }
2202 }
2203
2204
2205
2206
2207
2208
2209
2210
2211 public boolean writeFileWithHeader(File saveFile, String header) {
2212 return writeFileWithHeader(saveFile, header, true);
2213 }
2214
2215
2216
2217
2218
2219
2220
2221
2222 public boolean writeFileWithHeader(File saveFile, StringBuilder header) {
2223 return writeFileWithHeader(saveFile, header.toString());
2224 }
2225
2226
2227
2228
2229
2230
2231
2232 private String getExistingSegID(Character c) {
2233 if (c.equals(' ')) {
2234 c = 'A';
2235 }
2236
2237 List<String> segIDs = segidMap.get(c);
2238 if (segIDs != null) {
2239 String segID = segIDs.get(0);
2240 if (segIDs.size() > 1) {
2241 logger.log(Level.INFO, format(" Multiple SegIDs for to chain %s; using %s.", c, segID));
2242 }
2243 return segID;
2244 } else {
2245 logger.log(Level.INFO, format(" Creating SegID for to chain %s", c));
2246 return getSegID(c);
2247 }
2248 }
2249
2250
2251
2252
2253
2254
2255
2256 private String getSegID(Character c) {
2257 if (c.equals(' ')) {
2258 c = 'A';
2259 }
2260
2261
2262 if (c.equals(currentChainID)) {
2263 return currentSegID;
2264 }
2265
2266
2267 int count = 0;
2268 for (String segID : segIDs) {
2269 if (segID.endsWith(c.toString())) {
2270 count++;
2271 }
2272 }
2273
2274
2275 String newSegID;
2276 if (count == 0) {
2277 newSegID = c.toString();
2278 } else {
2279 newSegID = count + c.toString();
2280 }
2281 segIDs.add(newSegID);
2282 currentChainID = c;
2283 currentSegID = newSegID;
2284
2285 if (segidMap.containsKey(c)) {
2286 segidMap.get(c).add(newSegID);
2287 } else {
2288 List<String> newChainList = new ArrayList<>();
2289 newChainList.add(newSegID);
2290 segidMap.put(c, newChainList);
2291 }
2292
2293 return newSegID;
2294 }
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306 private void writeAtom(Atom atom, int serial, StringBuilder sb, StringBuilder anisouSB,
2307 BufferedWriter bw) throws IOException {
2308 String name = atom.getName();
2309 if (name.length() > 4) {
2310 name = name.substring(0, 4);
2311 } else if (name.length() == 1) {
2312 name = name + " ";
2313 } else if (name.length() == 2) {
2314 if (atom.getAtomType().valence == 0) {
2315 name = name + " ";
2316 } else {
2317 name = name + " ";
2318 }
2319 }
2320 double[] xyz = vdwH ? atom.getRedXYZ() : atom.getXYZ(null);
2321 if (nSymOp >= 0) {
2322 Crystal crystal = activeMolecularAssembly.getCrystal().getUnitCell();
2323 SymOp symOp = crystal.spaceGroup.getSymOp(nSymOp);
2324 double[] newXYZ = new double[xyz.length];
2325 crystal.applySymOp(xyz, newXYZ, symOp);
2326 xyz = newXYZ;
2327 }
2328 sb.replace(6, 16, format("%5s " + padLeft(name.toUpperCase(), 4), Hybrid36.encode(5, serial)));
2329 Character altLoc = atom.getAltLoc();
2330 sb.setCharAt(16, Objects.requireNonNullElse(altLoc, ' '));
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345 StringBuilder decimals = new StringBuilder();
2346 for (int i = 0; i < 3; i++) {
2347 try {
2348 decimals.append(StringUtils.fwFpDec(xyz[i], 8, 3));
2349 } catch (IllegalArgumentException ex) {
2350 String newValue = StringUtils.fwFpTrunc(xyz[i], 8, 3);
2351 logger.info(format(" XYZ %d coordinate %8.3f for atom %s "
2352 + "overflowed bounds of 8.3f string specified by PDB "
2353 + "format; truncating value to %s", i, xyz[i], atom, newValue));
2354 decimals.append(newValue);
2355 }
2356 }
2357 try {
2358 decimals.append(StringUtils.fwFpDec(atom.getOccupancy(), 6, 2));
2359 } catch (IllegalArgumentException ex) {
2360 logger.severe(
2361 format(" Occupancy %f for atom %s is impossible; " + "value must be between 0 and 1",
2362 atom.getOccupancy(), atom));
2363 }
2364 try {
2365 decimals.append(StringUtils.fwFpDec(atom.getTempFactor(), 6, 2));
2366 } catch (IllegalArgumentException ex) {
2367 String newValue = StringUtils.fwFpTrunc(atom.getTempFactor(), 6, 2);
2368 logger.info(format(" Atom temp factor %6.2f for atom %s overflowed "
2369 + "bounds of 6.2f string specified by PDB format; truncating " + "value to %s",
2370 atom.getTempFactor(), atom, newValue));
2371 decimals.append(newValue);
2372 }
2373 sb.replace(30, 66, decimals.toString());
2374
2375 name = Atom.ElementSymbol.values()[atom.getAtomicNumber() - 1].toString();
2376 name = name.toUpperCase();
2377 if (atom.isDeuterium()) {
2378 name = "D";
2379 }
2380 sb.replace(76, 78, padLeft(name, 2));
2381 sb.replace(78, 80, format("%2d", 0));
2382 bw.write(sb.toString());
2383 bw.newLine();
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402 double[] anisou = atom.getAnisou(null);
2403 if (anisou != null) {
2404 anisouSB.replace(6, 80, sb.substring(6, 80));
2405 anisouSB.replace(28, 70,
2406 format("%7d%7d%7d%7d%7d%7d", (int) (anisou[0] * 1e4), (int) (anisou[1] * 1e4),
2407 (int) (anisou[2] * 1e4), (int) (anisou[3] * 1e4), (int) (anisou[4] * 1e4),
2408 (int) (anisou[5] * 1e4)));
2409 bw.write(anisouSB.toString());
2410 bw.newLine();
2411 }
2412 }
2413
2414
2415 private enum Record {
2416 ANISOU, ATOM, CONECT, CRYST1, DBREF, END, MODEL, ENDMDL, HELIX, HETATM, LINK, MTRIX1, MTRIX2, MTRIX3, MODRES, SEQRES, SHEET, SSBOND, REMARK
2417 }
2418
2419
2420 public enum PDBFileStandard {
2421 VERSION2_3, VERSION3_0, VERSION3_1, VERSION3_2, VERSION3_3
2422 }
2423
2424 public static class Mutation {
2425
2426
2427 final int resID;
2428
2429 final String resName;
2430
2431 final char chainChar;
2432
2433 public Mutation(int resID, char chainChar, String newResName) {
2434 newResName = newResName.toUpperCase();
2435 if (newResName.length() != 3) {
2436 logger.log(Level.WARNING, format("Invalid mutation target: %s.", newResName));
2437 }
2438 try {
2439 AminoAcidUtils.AminoAcid3.valueOf(newResName);
2440 } catch (IllegalArgumentException ex) {
2441 logger.log(Level.WARNING, format("Invalid mutation target: %s.", newResName));
2442 }
2443 this.resID = resID;
2444 this.chainChar = chainChar;
2445 this.resName = newResName;
2446 }
2447 }
2448 }