View Javadoc
1   // ******************************************************************************
2   //
3   // Title:       Force Field X.
4   // Description: Force Field X - Software for Molecular Biophysics.
5   // Copyright:   Copyright (c) Michael J. Schnieders 2001-2024.
6   //
7   // This file is part of Force Field X.
8   //
9   // Force Field X is free software; you can redistribute it and/or modify it
10  // under the terms of the GNU General Public License version 3 as published by
11  // the Free Software Foundation.
12  //
13  // Force Field X is distributed in the hope that it will be useful, but WITHOUT
14  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16  // details.
17  //
18  // You should have received a copy of the GNU General Public License along with
19  // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple
20  // Place, Suite 330, Boston, MA 02111-1307 USA
21  //
22  // Linking this library statically or dynamically with other modules is making a
23  // combined work based on this library. Thus, the terms and conditions of the
24  // GNU General Public License cover the whole combination.
25  //
26  // As a special exception, the copyright holders of this library give you
27  // permission to link this library with independent modules to produce an
28  // executable, regardless of the license terms of these independent modules, and
29  // to copy and distribute the resulting executable under terms of your choice,
30  // provided that you also meet, for each linked independent module, the terms
31  // and conditions of the license of that module. An independent module is a
32  // module which is not derived from or based on this library. If you modify this
33  // library, you may extend this exception to your version of the library, but
34  // you are not obligated to do so. If you do not wish to do so, delete this
35  // exception statement from your version.
36  //
37  // ******************************************************************************
38  package ffx.utilities;
39  
40  import static ffx.utilities.TinkerUtils.parseTinkerAtomList;
41  import static java.lang.Integer.parseInt;
42  import static java.lang.Integer.parseUnsignedInt;
43  import static java.lang.String.format;
44  import static java.util.Arrays.asList;
45  import static org.apache.commons.math3.util.FastMath.max;
46  
47  import java.io.BufferedReader;
48  import java.io.BufferedWriter;
49  import java.io.File;
50  import java.io.FileInputStream;
51  import java.io.FileOutputStream;
52  import java.io.IOException;
53  import java.io.InputStreamReader;
54  import java.io.OutputStreamWriter;
55  import java.io.Reader;
56  import java.io.Writer;
57  import java.nio.charset.Charset;
58  import java.util.ArrayList;
59  import java.util.Arrays;
60  import java.util.Collections;
61  import java.util.HashMap;
62  import java.util.List;
63  import java.util.Map;
64  import java.util.Set;
65  import java.util.logging.Level;
66  import java.util.logging.Logger;
67  import java.util.regex.Matcher;
68  import java.util.regex.Pattern;
69  import java.util.zip.GZIPInputStream;
70  import java.util.zip.GZIPOutputStream;
71  import org.apache.commons.math3.util.FastMath;
72  
73  /**
74   * StringUtils class.
75   *
76   * @author Michael Schnieders
77   * @since 1.0
78   */
79  public class StringUtils {
80  
81    /** Constant <code>STANDARD_WATER_NAME="HOH"</code> */
82    public static final String STANDARD_WATER_NAME = "HOH";
83  
84    private static final Logger logger = Logger.getLogger(StringUtils.class.getName());
85    private static final Set<String> waterNames = Set.of("HOH", "DOD", "WAT", "TIP", "TIP3", "TIP4", "MOL");
86    private static final Map<String, String> ionNames;
87    private static final Pattern intRangePattern = Pattern.compile("(\\d+)-(\\d+)");
88  
89    static {
90      Map<String, String> ions = new HashMap<>();
91  
92      List<String> monoCats = asList("NA", "K", "LI", "RB", "CS", "FR", "AG", "AU");
93      for (String mCat : monoCats) {
94        ions.put(mCat, mCat);
95        ions.put(mCat + "+", mCat);
96        ions.put(mCat + "1", mCat);
97        ions.put(mCat + "1+", mCat);
98        ions.put(mCat + "+1", mCat);
99      }
100 
101     // TODO: Finalize treatment of transition metals like Mn and Zn which may occur in other
102     // oxidation states.
103     List<String> diCats = asList("BE", "MG", "CA", "SR", "BA", "RA", "MN", "ZN");
104     for (String diCat : diCats) {
105       ions.put(diCat, diCat);
106       ions.put(diCat + "+", diCat);
107       ions.put(diCat + "2", diCat);
108       ions.put(diCat + "2+", diCat);
109       ions.put(diCat + "+2", diCat);
110       ions.put(diCat + "++", diCat);
111     }
112 
113     List<String> monoAns = asList("F", "CL", "BR", "I", "AT");
114     for (String monoAn : monoAns) {
115       ions.put(monoAn, monoAn);
116       ions.put(monoAn + "-", monoAn);
117       ions.put(monoAn + "1", monoAn);
118       ions.put(monoAn + "1-", monoAn);
119       ions.put(monoAn + "-1", monoAn);
120     }
121 
122     ionNames = Collections.unmodifiableMap(ions);
123   }
124 
125   /**
126    * Private constructor to prevent instantiation.
127    */
128   private StringUtils() {
129     // Empty constructor.
130   }
131 
132   /**
133    * cifForID
134    *
135    * @param id a {@link java.lang.String} object.
136    * @return a {@link java.lang.String} object.
137    */
138   public static String cifForID(String id) {
139     if (id.length() != 4) {
140       return null;
141     }
142     return "http://www.rcsb.org/pdb/files/" + id.toLowerCase() + ".cif";
143   }
144 
145   /**
146    * Finds consecutive subranges in an array of ints, and returns their mins and maxes. This can
147    * include singletons.
148    *
149    * <p>Example: [4, 5, 6, 1, 1, 2, 5, 6, 7] would become [4,6],[1,1],[1,2],[5,7]
150    *
151    * @param set Array of ints to split into consecutive subranges.
152    * @return Consecutive subrange mins, maxes
153    */
154   public static List<int[]> consecutiveInts(int[] set) {
155     if (set == null || set.length == 0) {
156       return Collections.emptyList();
157     }
158     List<int[]> allRanges = new ArrayList<>();
159 
160     int rangeStart = set[0];
161     int rangeEnd = rangeStart;
162     for (int i = 1; i < set.length; i++) {
163       if (set[i] == rangeEnd + 1) {
164         rangeEnd = set[i];
165       } else {
166         allRanges.add(new int[] {rangeStart, rangeEnd});
167         rangeStart = set[i];
168         rangeEnd = rangeStart;
169       }
170     }
171     allRanges.add(new int[] {rangeStart, rangeEnd});
172     return allRanges;
173   }
174 
175   /**
176    * Creates a reader from a Gzip file to text.
177    *
178    * @param file Gzip file to read from.
179    * @return A Reader.
180    * @throws java.io.IOException Thrown if creation of the GZip Reader fails.
181    */
182   public static Reader createGzipReader(File file) throws IOException {
183     return createGzipReader(file, Charset.defaultCharset());
184   }
185 
186   /**
187    * Creates a reader from a Gzip file to text.
188    *
189    * @param file Gzip file to read from.
190    * @param cs Character set to use.
191    * @return A Reader.
192    * @throws java.io.IOException Thrown if creation of the GZip Reader fails.
193    */
194   public static Reader createGzipReader(File file, Charset cs) throws IOException {
195     /*
196      * The BufferedReader buffers the input requests, reading a large chunk at a time and caching it.
197      * The InputStreamReader converts the input bytes to characters.
198      * The GZIPInputStream decompresses incoming input bytes from GZIP to raw bytes.
199      * The FileInputStream reads raw bytes from a (gzipped) file.
200      */
201     return new BufferedReader(
202         new InputStreamReader(new GZIPInputStream(new FileInputStream(file)), cs));
203   }
204 
205   /**
206    * Creates a writer for text to a Gzip file.
207    *
208    * @param file Gzip file to write to.
209    * @return A Writer
210    * @throws java.io.IOException Thrown if creation of the GZip Writer fails.
211    */
212   public static Writer createGzipWriter(File file) throws IOException {
213     return createGzipWriter(file, Charset.defaultCharset());
214   }
215 
216   /**
217    * Creates a writer for text to a Gzip file.
218    *
219    * @param file Gzip file to write to.
220    * @param cs Character set to use.
221    * @return A Writer
222    * @throws java.io.IOException Thrown if creation of the GZip Writer fails.
223    */
224   public static Writer createGzipWriter(File file, Charset cs) throws IOException {
225     /*
226      * The BufferedWriter buffers the input.
227      * The OutputStreamWriter converts the input to bytes.
228      * The GZIPOutputStream compresses the bytes.
229      * The FileOutputStream writes bytes to a file.
230      */
231     return new BufferedWriter(
232         new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(file)), cs));
233   }
234 
235   /**
236    * Prints a fixed-width decimal, similar to String.format(%width.precf, val), but ensuring the
237    * resulting string is never longer than width. If the result ends in a period (such as 14.), the
238    * method leaves off the decimal. An exception is thrown if the value cannot be formatted in the
239    * specified width.
240    *
241    * @param val Value to print
242    * @param width Width of field
243    * @param prec Number of decimal places
244    * @return Formatted string
245    * @throws java.lang.IllegalArgumentException if any.
246    */
247   public static String fwDec(double val, int width, int prec) throws IllegalArgumentException {
248     if (width < 1 || prec < 0) {
249       throw new IllegalArgumentException(" Must have width >= 1 and precision >= 0");
250     }
251     int w1 = width - 1;
252     double maxVal = FastMath.pow(10.0, width);
253     double minVal = maxVal / -10.0;
254 
255     if (val >= maxVal) {
256       throw new IllegalArgumentException(
257           String.format(
258               " Value %f exceeded the maximum of %f enforced by width %d", val, maxVal, width));
259     } else if (val <= minVal) {
260       throw new IllegalArgumentException(
261           String.format(
262               " Value %f is less than the minimum of %f enforced by width %d", val, minVal, width));
263     }
264 
265     String str = String.format("%" + width + "." + prec + "f", val);
266     if (str.charAt(w1) == '.') {
267       return " " + str.substring(0, w1);
268     } else {
269       return str.substring(0, width);
270     }
271   }
272 
273   /**
274    * Prints a fixed-width decimal using <code>String.format</code> conventions, throwing an error if
275    * the value cannot be formatted within that space.
276    *
277    * @param val the value to print.
278    * @param width the width of the field.
279    * @param prec the number of decimal places.
280    * @return a {@link java.lang.String} object.
281    * @throws java.lang.IllegalArgumentException If the length of String is greater than the
282    *     width.
283    */
284   public static String fwFpDec(double val, int width, int prec) throws IllegalArgumentException {
285     String str = String.format("%" + width + "." + prec + "f", val);
286     if (str.length() > width) {
287       throw new IllegalArgumentException(
288           String.format(" Value %f cannot fit in width %d with precision %d", val, width, prec));
289     } else {
290       return str;
291     }
292   }
293 
294   /**
295    * Prints a fixed-width decimal using <code>String.format</code> conventions, reducing the value if
296    * necessary to fit within the width.
297    *
298    * @param val The value to print.
299    * @param width The width of the field.
300    * @param prec The number of decimal places.
301    * @return a {@link java.lang.String} object.
302    */
303   public static String fwFpTrunc(double val, int width, int prec) {
304     String str = String.format("%" + width + "." + prec + "f", val);
305     if (str.length() > width) {
306       StringBuilder sb;
307       if (val < 0) {
308         sb = new StringBuilder("-");
309       } else {
310         sb = new StringBuilder("9");
311       }
312 
313       sb.append(org.apache.commons.lang3.StringUtils.repeat("9", max(0, (width - prec - 2))));
314       sb.append(".");
315       sb.append(org.apache.commons.lang3.StringUtils.repeat("9", max(0, prec)));
316       str = sb.toString();
317     }
318     return str;
319   }
320 
321   /**
322    * Returns a Map from recognized ion names to standard ion names.
323    *
324    * @return Map from ion names to standardized ion names.
325    */
326   public static Map<String, String> getIonNames() {
327     return new HashMap<>(ionNames);
328   }
329 
330   /**
331    * Returns a List of recognized water names (defensive copy).
332    *
333    * @return List of water names.
334    */
335   public static List<String> getWaterNames() {
336     return new ArrayList<>(waterNames);
337   }
338 
339   /**
340    * Checks if a String matches a known monoatomic ion name.
341    *
342    * @param name String to check.
343    * @return If it is the name of a monoatomic ion.
344    */
345   public static boolean looksLikeIon(String name) {
346     return ionNames.containsKey(name.toUpperCase());
347   }
348 
349   /**
350    * Checks if a String matches a known water name.
351    *
352    * @param name String to check.
353    * @return If it is a water name.
354    */
355   public static boolean looksLikeWater(String name) {
356     return waterNames.contains(name.toUpperCase());
357   }
358 
359   /**
360    * padLeft
361    *
362    * @param s a {@link java.lang.String} object.
363    * @param n The number of spaces to pad.
364    * @return a {@link java.lang.String} object.
365    */
366   public static String padLeft(String s, int n) {
367     return String.format("%" + n + "s", s);
368   }
369 
370   /**
371    * padRight
372    *
373    * @param s a {@link java.lang.String} object.
374    * @param n The number of spaces to pad.
375    * @return a {@link java.lang.String} object.
376    */
377   public static String padRight(String s, int n) {
378     return String.format("%-" + n + "s", s);
379   }
380 
381   /**
382    * Parses a numerical argument for an atom-specific flag.
383    *
384    * <p>Parses, checks validity, and then returns the appropriate range.
385    *
386    * <p>Input should be 1-indexed (user end), output 0-indexed.
387    *
388    * @param keyType Type of key
389    * @param atomRange Input string
390    * @param nAtoms Number of atoms in the MolecularAssembly
391    * @return A List of selected atoms.
392    * @throws java.lang.IllegalArgumentException if an invalid argument
393    */
394   public static List<Integer> parseAtomRange(String keyType, String atomRange, int nAtoms)
395       throws IllegalArgumentException {
396     Matcher m = intRangePattern.matcher(atomRange);
397     if (m.matches()) {
398       int start = parseInt(m.group(1)) - 1;
399       int end = parseInt(m.group(2)) - 1;
400       if (start > end) {
401         throw new IllegalArgumentException(format(" %s input %s not valid: start > end.", keyType, atomRange));
402       } else if (start < 0) {
403         throw new IllegalArgumentException(
404             format(" %s input %s not valid: atoms should be indexed starting from 1.", keyType, atomRange));
405       } else if (start >= nAtoms) {
406         throw new IllegalArgumentException(
407             format(" %s input %s not valid: atom range is out of bounds for assembly of length %d.",
408                 keyType, atomRange, nAtoms));
409       } else {
410         if (end >= nAtoms) {
411           logger.log(
412               Level.INFO,
413               format(" Truncating range %s to end of valid range %d.", atomRange, nAtoms));
414           end = nAtoms - 1;
415         }
416         List<Integer> selectedAtoms = new ArrayList<>();
417         for (int i = start; i <= end; i++) {
418           selectedAtoms.add(i);
419         }
420         return selectedAtoms;
421       }
422     } else {
423       try {
424         int atNum = parseUnsignedInt(atomRange) - 1;
425         if (atNum < 0 || atNum >= nAtoms) {
426           throw new IllegalArgumentException(
427               format(
428                   " %s numerical argument %s out-of-bounds for range 1 to %d",
429                   keyType, atomRange, nAtoms));
430         }
431         List<Integer> selectedAtoms = new ArrayList<>();
432         selectedAtoms.add(atNum);
433         return selectedAtoms;
434       } catch (NumberFormatException ex) {
435         // Try to parse as a Tinker style range.
436         List<String> tokens = asList(atomRange.split("\\s+"));
437         return parseTinkerAtomList(tokens, -1, -1);
438       }
439     }
440   }
441 
442   /**
443    * Parses a list of atom ranges for a per atom flag.
444    *
445    * <p>Parses, checks validity, and then returns a list with the index of selected atoms.
446    *
447    * <p>Input should be 1-indexed (user end) and the output 0-indexed.
448    *
449    * @param keyType Type of key
450    * @param atomRanges Input string
451    * @param nAtoms Number of atoms in the MolecularAssembly
452    * @return A List of selected atoms.
453    * @throws java.lang.IllegalArgumentException if an invalid argument
454    */
455   public static List<Integer> parseAtomRanges(String keyType, String atomRanges, int nAtoms)
456       throws IllegalArgumentException {
457     List<Integer> atomList = new ArrayList<>();
458     // Replace "n" and "N" with the number of atoms.
459     String n = Integer.toString(nAtoms);
460     atomRanges = atomRanges.toUpperCase().replace("N", n);
461     // Split on periods (.), commas (,) or semicolons(;).
462     // IntelliJ suggests replacing "\\.|,|;" with [.,;]
463     String[] ranges =
464         Arrays.stream(atomRanges.split("\\.|,|;")).map(String::trim).toArray(String[]::new);
465 
466     for (String range : ranges) {
467       List<Integer> list = parseAtomRange(keyType, range, nAtoms);
468       // Avoid adding duplicates.
469       for (int i : list) {
470         if (!atomList.contains(i)) {
471           atomList.add(i);
472         }
473       }
474     }
475     return atomList;
476   }
477 
478   /**
479    * pdbForID
480    *
481    * @param id a {@link java.lang.String} object.
482    * @return a {@link java.lang.String} object.
483    */
484   public static String pdbForID(String id) {
485     if (id.length() != 4) {
486       return null;
487     }
488     return "http://www.rcsb.org/pdb/files/" + id.toLowerCase() + ".pdb";
489   }
490 
491   /**
492    * Checks if a String looks like a known ion. Returns either its standardized name, or null if it
493    * doesn't look like an ion.
494    *
495    * @param name String to check.
496    * @return Standard ion name (matches) or null (no match).
497    */
498   public static String tryParseIon(String name) {
499     return ionNames.getOrDefault(name.toUpperCase(), null);
500   }
501 
502   /**
503    * Checks if a String looks like a water molecule. Returns either a standardized water name, or
504    * null if it doesn't look like water.
505    *
506    * @param name String to check.
507    * @return Standard water name (matches) or null (no match).
508    */
509   public static String tryParseWater(String name) {
510     return waterNames.contains(name.toUpperCase()) ? STANDARD_WATER_NAME : null;
511   }
512 
513   /**
514    * Write atoms ranges for a list of atom indices.
515    * @param atoms Atoms indices for which a comma separated list is desired.
516    * @return String of the atom list (comma separated, hyphens for large ranges).
517    */
518   public static String writeAtomRanges(int[] atoms){
519     Arrays.sort(atoms);
520     int nAtoms = atoms.length;
521     StringBuilder output = new StringBuilder();
522     for(int i = 0; i < nAtoms; i++){
523       int index = 0;
524       int current = atoms[i] + 1;
525       output.append(current);
526       // Determine if multiple entries are in a row (replace with "-").
527       while(i + index + 1 < nAtoms && atoms[i + index + 1] + 1 == current + index + 1){
528         index++;
529       }
530       if(index >= 2){
531         output.append("-").append(atoms[i + index] + 1);
532       }else if(index == 1){
533         output.append(",").append(atoms[i + index] + 1);
534       }
535       i += index;
536       if(i + 1 < nAtoms){
537         output.append(",");
538       }
539     }
540     String string = output.toString();
541     if(string.endsWith(",")){
542       return string.substring(0,string.length() - 1);
543     }else {
544       return output.toString();
545     }
546   }
547 }