View Javadoc
1   // ******************************************************************************
2   //
3   // Title:       Force Field X.
4   // Description: Force Field X - Software for Molecular Biophysics.
5   // Copyright:   Copyright (c) Michael J. Schnieders 2001-2024.
6   //
7   // This file is part of Force Field X.
8   //
9   // Force Field X is free software; you can redistribute it and/or modify it
10  // under the terms of the GNU General Public License version 3 as published by
11  // the Free Software Foundation.
12  //
13  // Force Field X is distributed in the hope that it will be useful, but WITHOUT
14  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16  // details.
17  //
18  // You should have received a copy of the GNU General Public License along with
19  // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple
20  // Place, Suite 330, Boston, MA 02111-1307 USA
21  //
22  // Linking this library statically or dynamically with other modules is making a
23  // combined work based on this library. Thus, the terms and conditions of the
24  // GNU General Public License cover the whole combination.
25  //
26  // As a special exception, the copyright holders of this library give you
27  // permission to link this library with independent modules to produce an
28  // executable, regardless of the license terms of these independent modules, and
29  // to copy and distribute the resulting executable under terms of your choice,
30  // provided that you also meet, for each linked independent module, the terms
31  // and conditions of the license of that module. An independent module is a
32  // module which is not derived from or based on this library. If you modify this
33  // library, you may extend this exception to your version of the library, but
34  // you are not obligated to do so. If you do not wish to do so, delete this
35  // exception statement from your version.
36  //
37  // ******************************************************************************
38  package ffx.xray.parsers;
39  
40  import static ffx.crystal.SpaceGroupDefinitions.spaceGroupNumber;
41  import static ffx.crystal.SpaceGroupInfo.pdb2ShortName;
42  import static java.lang.Double.parseDouble;
43  import static java.lang.Integer.parseInt;
44  import static java.lang.String.format;
45  import static org.apache.commons.math3.util.FastMath.min;
46  
47  import ffx.crystal.Crystal;
48  import ffx.crystal.HKL;
49  import ffx.crystal.ReflectionList;
50  import ffx.crystal.Resolution;
51  import ffx.crystal.SpaceGroupInfo;
52  import ffx.xray.DiffractionRefinementData;
53  import java.io.BufferedReader;
54  import java.io.File;
55  import java.io.FileReader;
56  import java.io.IOException;
57  import java.util.logging.Level;
58  import java.util.logging.Logger;
59  import org.apache.commons.configuration2.CompositeConfiguration;
60  
61  /**
62   * CIF file reader
63   *
64   * @author Timothy D. Fenn
65   * @since 1.0
66   */
67  public class CIFFilter implements DiffractionFileFilter {
68  
69    private static final Logger logger = Logger.getLogger(CIFFilter.class.getName());
70    private final double[] cell = {-1.0, -1.0, -1.0, -1.0, -1.0, -1.0};
71    private double resHigh = -1.0;
72    private String spacegroupName = null;
73    private int spacegroupNum = -1;
74    private int h = -1, k = -1, l = -1;
75    private int fo = -1, sigFo = -1;
76    private int io = -1, sigIo = -1;
77    private int rFree = -1;
78    private int nAll, nObs;
79  
80    /** Constructor. */
81    public CIFFilter() {
82    }
83  
84    /** {@inheritDoc} */
85    @Override
86    public ReflectionList getReflectionList(File cifFile) {
87      return getReflectionList(cifFile, null);
88    }
89  
90    /** {@inheritDoc} */
91    @Override
92    public ReflectionList getReflectionList(File cifFile, CompositeConfiguration properties) {
93      try (BufferedReader br = new BufferedReader(new FileReader(cifFile))) {
94        String string;
95        while ((string = br.readLine()) != null) {
96  
97          // Reached reflections, break.
98          if (string.startsWith("_refln.")) {
99            break;
100         }
101         String[] strArray = string.split("\\s+");
102         if (strArray[0].startsWith("_reflns")
103             || strArray[0].startsWith("_cell")
104             || strArray[0].startsWith("_symmetry")) {
105           String[] cifArray = strArray[0].split("\\.+");
106           switch (CIFHeader.toHeader(cifArray[1])) {
107             case d_resolution_high:
108               resHigh = parseDouble(strArray[1]);
109               break;
110             case number_all:
111               nAll = parseInt(strArray[1]);
112               break;
113             case number_obs:
114               nObs = parseInt(strArray[1]);
115               break;
116             case length_a:
117               cell[0] = parseDouble(strArray[1]);
118               break;
119             case length_b:
120               cell[1] = parseDouble(strArray[1]);
121               break;
122             case length_c:
123               cell[2] = parseDouble(strArray[1]);
124               break;
125             case angle_alpha:
126               cell[3] = parseDouble(strArray[1]);
127               break;
128             case angle_beta:
129               cell[4] = parseDouble(strArray[1]);
130               break;
131             case angle_gamma:
132               cell[5] = parseDouble(strArray[1]);
133               break;
134             case Int_Tables_number:
135               spacegroupNum = parseInt(strArray[1]);
136               break;
137             case space_group_name_H_M:
138               String[] spacegroupNameArray = string.split("'+");
139               if (spacegroupNameArray.length > 1) {
140                 spacegroupName = spacegroupNameArray[1];
141               } else if (strArray.length > 1) {
142                 spacegroupName = strArray[1];
143               }
144               break;
145           }
146         }
147       }
148     } catch (IOException e) {
149       String message = " CIF IO Exception.";
150       logger.log(Level.WARNING, message, e);
151       return null;
152     }
153 
154     if (spacegroupNum < 0 && spacegroupName != null) {
155       spacegroupNum = spaceGroupNumber(pdb2ShortName(spacegroupName));
156     }
157 
158     if (spacegroupNum < 0 || resHigh < 0 || cell[0] < 0) {
159       logger.info(
160           " The CIF header contains insufficient information to generate the reflection list.");
161       return null;
162     }
163 
164     if (logger.isLoggable(Level.INFO)) {
165       StringBuilder sb = new StringBuilder();
166       sb.append(format("\nOpening %s\n", cifFile.getName()));
167       sb.append(" Setting up Reflection List based on CIF:\n");
168       sb.append(format("  spacegroup #: %d (name: %s)\n",
169           spacegroupNum, SpaceGroupInfo.spaceGroupNames[spacegroupNum - 1]));
170       sb.append(format("  Resolution: %8.3f\n", 0.999999 * resHigh));
171       sb.append(format("  Cell: %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f\n",
172           cell[0], cell[1], cell[2], cell[3], cell[4], cell[5]));
173       sb.append(format("\n  CIF # HKL (observed): %d\n", nObs));
174       sb.append(format("  CIF # HKL (all):      %d\n", nAll));
175       logger.info(sb.toString());
176     }
177 
178     logger.info(format(" Space group number %d", spacegroupNum));
179     Crystal crystal = new Crystal(cell[0], cell[1], cell[2], cell[3], cell[4], cell[5],
180         spacegroupNum);
181 
182     double sampling = 1.0 / 1.5;
183     if (properties != null) {
184       sampling = properties.getDouble("sampling", 1.0 / 1.5);
185     }
186     Resolution resolution = new Resolution(0.999999 * resHigh, sampling);
187     return new ReflectionList(crystal, resolution, properties);
188   }
189 
190   /** {@inheritDoc} */
191   @Override
192   public double getResolution(File cifFile, Crystal crystal) {
193     double resolution = Double.POSITIVE_INFINITY;
194     try (BufferedReader br = new BufferedReader(new FileReader(cifFile))) {
195       String string;
196       int nCol = 0;
197       boolean inHKL = false;
198       while ((string = br.readLine()) != null) {
199         String[] strArray = string.split("\\s+");
200         if (strArray[0].startsWith("_refln.")) {
201           inHKL = true;
202           br.mark(0);
203           String[] cifArray = strArray[0].split("\\.+");
204           switch (CIFHeader.toHeader(cifArray[1])) {
205             case index_h:
206               h = nCol;
207               break;
208             case index_k:
209               k = nCol;
210               break;
211             case index_l:
212               l = nCol;
213               break;
214           }
215           nCol++;
216         } else if (inHKL) {
217           if (h < 0 || k < 0 || l < 0) {
218             String message = " Fatal error in CIF file - no H K L indexes?\n";
219             logger.log(Level.SEVERE, message);
220             return -1.0;
221           }
222           break;
223         }
224       }
225 
226       // Go back to where the reflections start.
227       br.reset();
228       HKL hkl = new HKL();
229       while ((string = br.readLine()) != null) {
230 
231         // Reached end, break.
232         if (string.trim().startsWith("#END")) {
233           break;
234         } else if (string.trim().startsWith("data")) {
235           break;
236         } else if (string.trim().startsWith("#")) {
237           continue;
238         }
239 
240         // Some files split data on to multiple lines.
241         String[] strArray = string.trim().split("\\s+");
242         while (strArray.length < nCol) {
243           string = string + " " + br.readLine();
244           strArray = string.trim().split("\\s+");
245         }
246 
247         int ih = parseInt(strArray[h]);
248         int ik = parseInt(strArray[k]);
249         int il = parseInt(strArray[l]);
250 
251         hkl.setH(ih);
252         hkl.setK(ik);
253         hkl.setL(il);
254         resolution = min(resolution, crystal.res(hkl));
255       }
256     } catch (IOException e) {
257       String message = " CIF IO Exception.";
258       logger.log(Level.WARNING, message, e);
259       return -1.0;
260     }
261     return resolution;
262   }
263 
264   /** {@inheritDoc} */
265   @Override
266   public boolean readFile(
267       File cifFile,
268       ReflectionList reflectionList,
269       DiffractionRefinementData refinementData,
270       CompositeConfiguration properties) {
271 
272     int nRead, nNAN, nRes;
273     int nIgnore, nCIFIgnore;
274     int nFriedel, nCut;
275     boolean transpose = false;
276     boolean intensitiesToAmplitudes = false;
277 
278     StringBuilder sb = new StringBuilder();
279     sb.append(format(" Opening %s\n", cifFile.getName()));
280     if (refinementData.rFreeFlag < 0) {
281       refinementData.setFreeRFlag(1);
282       sb.append(format(" Setting R free flag to CIF default: %d\n", refinementData.rFreeFlag));
283     }
284 
285     try {
286       BufferedReader br = new BufferedReader(new FileReader(cifFile));
287 
288       String string;
289       int nCol = 0;
290       boolean inHKL = false;
291       while ((string = br.readLine()) != null) {
292         String[] stringArray = string.split("\\s+");
293         if (stringArray[0].startsWith("_refln.")) {
294           inHKL = true;
295           br.mark(0);
296           String[] cifArray = stringArray[0].split("\\.+");
297           switch (CIFHeader.toHeader(cifArray[1])) {
298             case index_h:
299               h = nCol;
300               break;
301             case index_k:
302               k = nCol;
303               break;
304             case index_l:
305               l = nCol;
306               break;
307             case F_meas:
308             case F_meas_au:
309               fo = nCol;
310               break;
311             case F_meas_sigma:
312             case F_meas_sigma_au:
313               sigFo = nCol;
314               break;
315             case intensity_meas:
316               io = nCol;
317               break;
318             case intensity_sigma:
319               sigIo = nCol;
320               break;
321             case status:
322               rFree = nCol;
323               break;
324           }
325           nCol++;
326         } else if (inHKL) {
327           if (h < 0 || k < 0 || l < 0) {
328             String message = " Fatal error in CIF file - no H K L indexes?\n";
329             logger.log(Level.SEVERE, message);
330             return false;
331           }
332           break;
333         }
334       }
335 
336       if (fo < 0 && sigFo < 0 && io > 0 && sigIo > 0) {
337         intensitiesToAmplitudes = true;
338       }
339 
340       if (fo < 0 && io < 0) {
341         logger.severe(" Reflection data (I/F) not found in CIF file!");
342       }
343 
344       // Go back to where the reflections start.
345       br.reset();
346 
347       // Check if HKLs need to be transposed or not.
348       HKL mate = new HKL();
349       int nPosIgnore = 0;
350       int nTransIgnore = 0;
351       while ((string = br.readLine()) != null) {
352 
353         if (string.trim().startsWith("#END")) {
354           // Reached end, break.
355           break;
356         } else if (string.trim().startsWith("data")) {
357           break;
358         } else if (string.trim().startsWith("#")) {
359           continue;
360         }
361 
362         // Some files split data on to multiple lines.
363         String[] strArray = string.trim().split("\\s+");
364         while (strArray.length < nCol) {
365           string = string + " " + br.readLine();
366           strArray = string.trim().split("\\s+");
367         }
368 
369         if (rFree > 0) {
370           // Ignored cases.
371           if (strArray[rFree].charAt(0) == 'x'
372               || strArray[rFree].charAt(0) == '<'
373               || strArray[rFree].charAt(0) == '-'
374               || strArray[rFree].charAt(0) == 'h'
375               || strArray[rFree].charAt(0) == 'l') {
376             continue;
377           }
378         }
379 
380         int ih = parseInt(strArray[h]);
381         int ik = parseInt(strArray[k]);
382         int il = parseInt(strArray[l]);
383 
384         boolean friedel = reflectionList.findSymHKL(ih, ik, il, mate, false);
385         HKL hklPos = reflectionList.getHKL(mate);
386         if (hklPos == null) {
387           nPosIgnore++;
388         }
389 
390         friedel = reflectionList.findSymHKL(ih, ik, il, mate, true);
391         HKL hklTrans = reflectionList.getHKL(mate);
392         if (hklTrans == null) {
393           nTransIgnore++;
394         }
395       }
396       if (nPosIgnore > nTransIgnore) {
397         transpose = true;
398       }
399 
400       // Re-open to start at beginning.
401       br = new BufferedReader(new FileReader(cifFile));
402       inHKL = false;
403       while ((string = br.readLine()) != null) {
404         String[] strArray = string.split("\\s+");
405         if (strArray[0].startsWith("_refln.")) {
406           br.mark(0);
407           inHKL = true;
408         } else if (inHKL) {
409           break;
410         }
411       }
412 
413       // Go back to where the reflections start.
414       br.reset();
415 
416       // Read in data.
417       double[][] anofSigF = new double[refinementData.n][4];
418       for (int i = 0; i < refinementData.n; i++) {
419         anofSigF[i][0] = anofSigF[i][1] = anofSigF[i][2] = anofSigF[i][3] = Double.NaN;
420       }
421       nRead = nNAN = nRes = nIgnore = nCIFIgnore = nFriedel = nCut = 0;
422       while ((string = br.readLine()) != null) {
423 
424         // Reached end, break.
425         if (string.trim().startsWith("#END")) {
426           break;
427         } else if (string.trim().startsWith("data")) {
428           break;
429         } else if (string.trim().startsWith("#")) {
430           continue;
431         }
432 
433         // Some files split data on to multiple lines.
434         String[] strArray = string.trim().split("\\s+");
435         while (strArray.length < nCol) {
436           string = string + " " + br.readLine();
437           strArray = string.trim().split("\\s+");
438         }
439 
440         int ih = parseInt(strArray[h]);
441         int ik = parseInt(strArray[k]);
442         int il = parseInt(strArray[l]);
443 
444         boolean friedel = reflectionList.findSymHKL(ih, ik, il, mate, transpose);
445         HKL hkl = reflectionList.getHKL(mate);
446         if (hkl != null) {
447           boolean isnull = false;
448           if (rFree > 0) {
449             if (strArray[rFree].charAt(0) == 'o') {
450               refinementData.setFreeR(hkl.getIndex(), 0);
451             } else if (strArray[rFree].charAt(0) == 'f') {
452               refinementData.setFreeR(hkl.getIndex(), 1);
453             } else if (strArray[rFree].charAt(0) == 'x') {
454               isnull = true;
455               nNAN++;
456             } else if (strArray[rFree].charAt(0) == '<'
457                 || strArray[rFree].charAt(0) == '-'
458                 || strArray[rFree].charAt(0) == 'h'
459                 || strArray[rFree].charAt(0) == 'l') {
460               isnull = true;
461               nCIFIgnore++;
462             } else {
463               refinementData.setFreeR(hkl.getIndex(), parseInt(strArray[rFree]));
464             }
465           }
466 
467           if (!intensitiesToAmplitudes && !isnull) {
468             if (strArray[fo].charAt(0) == '?' || strArray[sigFo].charAt(0) == '?') {
469               nNAN++;
470               continue;
471             }
472 
473             if (refinementData.fSigFCutoff > 0.0) {
474               double f1 = parseDouble(strArray[fo]);
475               double sigf1 = parseDouble(strArray[sigFo]);
476               if ((f1 / sigf1) < refinementData.fSigFCutoff) {
477                 nCut++;
478                 continue;
479               }
480             }
481 
482             if (friedel) {
483               anofSigF[hkl.getIndex()][2] = parseDouble(strArray[fo]);
484               anofSigF[hkl.getIndex()][3] = parseDouble(strArray[sigFo]);
485               nFriedel++;
486             } else {
487               anofSigF[hkl.getIndex()][0] = parseDouble(strArray[fo]);
488               anofSigF[hkl.getIndex()][1] = parseDouble(strArray[sigFo]);
489             }
490           }
491 
492           if (intensitiesToAmplitudes && !isnull) {
493             if (strArray[io].charAt(0) == '?' || strArray[sigIo].charAt(0) == '?') {
494               nNAN++;
495               continue;
496             }
497 
498             if (friedel) {
499               anofSigF[hkl.getIndex()][2] = parseDouble(strArray[io]);
500               anofSigF[hkl.getIndex()][3] = parseDouble(strArray[sigIo]);
501               nFriedel++;
502             } else {
503               anofSigF[hkl.getIndex()][0] = parseDouble(strArray[io]);
504               anofSigF[hkl.getIndex()][1] = parseDouble(strArray[sigIo]);
505             }
506           }
507 
508           nRead++;
509         } else {
510           HKL tmp = new HKL(ih, ik, il);
511           if (!reflectionList.resolution.inInverseResSqRange(
512               reflectionList.crystal.invressq(tmp))) {
513             nRes++;
514           } else {
515             nIgnore++;
516           }
517         }
518       }
519       br.close();
520 
521       // Set up fsigf from F+ and F-.
522       refinementData.generateFsigFfromAnomalousFsigF(anofSigF);
523       if (intensitiesToAmplitudes) {
524         refinementData.intensitiesToAmplitudes();
525       }
526     } catch (IOException ioe) {
527       System.out.println(" IO Exception: " + ioe.getMessage());
528       return false;
529     }
530 
531     sb.append(format(" HKL data is %s\n", transpose ? "transposed" : "not transposed"));
532     sb.append(format(" HKL read in:                             %d\n", nRead));
533     sb.append(format(" HKL read as Friedel mates:               %d\n", nFriedel));
534     sb.append(format(" HKL with NaN (ignored):                  %d\n", nNAN));
535     sb.append(format(" HKL NOT read in (status <, -, h or l):   %d\n", nCIFIgnore));
536     sb.append(format(" HKL NOT read in (too high resolution):   %d\n", nRes));
537     sb.append(format(" HKL NOT read in (not in internal list?): %d\n", nIgnore));
538     sb.append(format(" HKL NOT read in (F/sigF cutoff):         %d\n", nCut));
539     sb.append(
540         format(" HKL in internal list:                    %d\n", reflectionList.hklList.size()));
541 
542     if (logger.isLoggable(Level.INFO)) {
543       logger.info(sb.toString());
544     }
545 
546     boolean doRFree = properties.getBoolean("generate-rfree", false);
547     if (doRFree) {
548       logger.info(" Generating R free flags");
549       refinementData.generateRFree();
550     }
551     return true;
552   }
553 
554   private enum CIFHeader {
555     d_resolution_high,
556     number_all,
557     number_obs,
558     length_a,
559     length_b,
560     length_c,
561     angle_alpha,
562     angle_beta,
563     angle_gamma,
564     Int_Tables_number,
565     space_group_name_H_M,
566     crystal_id,
567     wavelength_id,
568     scale_group_code,
569     status,
570     index_h,
571     index_k,
572     index_l,
573     F_meas,
574     F_meas_au,
575     F_meas_sigma,
576     F_meas_sigma_au,
577     intensity_meas,
578     intensity_sigma,
579     NOVALUE;
580 
581     public static CIFHeader toHeader(String str) {
582       try {
583         return valueOf(str.replace('-', '_'));
584       } catch (Exception ex) {
585         return NOVALUE;
586       }
587     }
588   }
589 }