View Javadoc
1   // ******************************************************************************
2   //
3   // Title:       Force Field X.
4   // Description: Force Field X - Software for Molecular Biophysics.
5   // Copyright:   Copyright (c) Michael J. Schnieders 2001-2025.
6   //
7   // This file is part of Force Field X.
8   //
9   // Force Field X is free software; you can redistribute it and/or modify it
10  // under the terms of the GNU General Public License version 3 as published by
11  // the Free Software Foundation.
12  //
13  // Force Field X is distributed in the hope that it will be useful, but WITHOUT
14  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16  // details.
17  //
18  // You should have received a copy of the GNU General Public License along with
19  // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple
20  // Place, Suite 330, Boston, MA 02111-1307 USA
21  //
22  // Linking this library statically or dynamically with other modules is making a
23  // combined work based on this library. Thus, the terms and conditions of the
24  // GNU General Public License cover the whole combination.
25  //
26  // As a special exception, the copyright holders of this library give you
27  // permission to link this library with independent modules to produce an
28  // executable, regardless of the license terms of these independent modules, and
29  // to copy and distribute the resulting executable under terms of your choice,
30  // provided that you also meet, for each linked independent module, the terms
31  // and conditions of the license of that module. An independent module is a
32  // module which is not derived from or based on this library. If you modify this
33  // library, you may extend this exception to your version of the library, but
34  // you are not obligated to do so. If you do not wish to do so, delete this
35  // exception statement from your version.
36  //
37  // ******************************************************************************
38  package ffx.xray.parsers;
39  
40  import ffx.crystal.Crystal;
41  import ffx.crystal.HKL;
42  import ffx.crystal.ReflectionList;
43  import ffx.crystal.Resolution;
44  import ffx.crystal.SpaceGroupInfo;
45  import ffx.xray.DiffractionRefinementData;
46  import org.apache.commons.configuration2.CompositeConfiguration;
47  
48  import java.io.BufferedReader;
49  import java.io.File;
50  import java.io.FileReader;
51  import java.io.IOException;
52  import java.util.logging.Level;
53  import java.util.logging.Logger;
54  
55  import static ffx.crystal.SpaceGroupDefinitions.spaceGroupNumber;
56  import static ffx.crystal.SpaceGroupInfo.pdb2ShortName;
57  import static java.lang.Double.parseDouble;
58  import static java.lang.Integer.parseInt;
59  import static java.lang.String.format;
60  import static org.apache.commons.math3.util.FastMath.min;
61  
62  /**
63   * CIF file reader
64   *
65   * @author Timothy D. Fenn
66   * @since 1.0
67   */
68  public class CIFFilter implements DiffractionFileFilter {
69  
70    private static final Logger logger = Logger.getLogger(CIFFilter.class.getName());
71    private final double[] cell = {-1.0, -1.0, -1.0, -1.0, -1.0, -1.0};
72    private double resHigh = -1.0;
73    private String spacegroupName = null;
74    private int spacegroupNum = -1;
75    private int h = -1, k = -1, l = -1;
76    private int fo = -1, sigFo = -1;
77    private int io = -1, sigIo = -1;
78    private int rFree = -1;
79    private int nAll, nObs;
80  
81    /** Constructor. */
82    public CIFFilter() {
83    }
84  
85    /** {@inheritDoc} */
86    @Override
87    public ReflectionList getReflectionList(File cifFile) {
88      return getReflectionList(cifFile, null);
89    }
90  
91    /** {@inheritDoc} */
92    @Override
93    public ReflectionList getReflectionList(File cifFile, CompositeConfiguration properties) {
94      try (BufferedReader br = new BufferedReader(new FileReader(cifFile))) {
95        String string;
96        while ((string = br.readLine()) != null) {
97  
98          // Reached reflections, break.
99          if (string.startsWith("_refln.")) {
100           break;
101         }
102         String[] strArray = string.split("\\s+");
103         if (strArray[0].startsWith("_reflns")
104             || strArray[0].startsWith("_cell")
105             || strArray[0].startsWith("_symmetry")) {
106           String[] cifArray = strArray[0].split("\\.+");
107           switch (CIFHeader.toHeader(cifArray[1])) {
108             case d_resolution_high:
109               resHigh = parseDouble(strArray[1]);
110               break;
111             case number_all:
112               nAll = parseInt(strArray[1]);
113               break;
114             case number_obs:
115               nObs = parseInt(strArray[1]);
116               break;
117             case length_a:
118               cell[0] = parseDouble(strArray[1]);
119               break;
120             case length_b:
121               cell[1] = parseDouble(strArray[1]);
122               break;
123             case length_c:
124               cell[2] = parseDouble(strArray[1]);
125               break;
126             case angle_alpha:
127               cell[3] = parseDouble(strArray[1]);
128               break;
129             case angle_beta:
130               cell[4] = parseDouble(strArray[1]);
131               break;
132             case angle_gamma:
133               cell[5] = parseDouble(strArray[1]);
134               break;
135             case Int_Tables_number:
136               spacegroupNum = parseInt(strArray[1]);
137               break;
138             case space_group_name_H_M:
139               String[] spacegroupNameArray = string.split("'+");
140               if (spacegroupNameArray.length > 1) {
141                 spacegroupName = spacegroupNameArray[1];
142               } else if (strArray.length > 1) {
143                 spacegroupName = strArray[1];
144               }
145               break;
146           }
147         }
148       }
149     } catch (IOException e) {
150       String message = " CIF IO Exception.";
151       logger.log(Level.WARNING, message, e);
152       return null;
153     }
154 
155     if (spacegroupNum < 0 && spacegroupName != null) {
156       spacegroupNum = spaceGroupNumber(pdb2ShortName(spacegroupName));
157     }
158 
159     if (spacegroupNum < 0 || resHigh < 0 || cell[0] < 0) {
160       logger.info(
161           " The CIF header contains insufficient information to generate the reflection list.");
162       return null;
163     }
164 
165     if (logger.isLoggable(Level.INFO)) {
166       StringBuilder sb = new StringBuilder();
167       sb.append(format("\nOpening %s\n", cifFile.getName()));
168       sb.append(" Setting up Reflection List based on CIF:\n");
169       sb.append(format("  spacegroup #: %d (name: %s)\n",
170           spacegroupNum, SpaceGroupInfo.spaceGroupNames[spacegroupNum - 1]));
171       sb.append(format("  Resolution: %8.3f\n", 0.999999 * resHigh));
172       sb.append(format("  Cell: %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f\n",
173           cell[0], cell[1], cell[2], cell[3], cell[4], cell[5]));
174       sb.append(format("\n  CIF # HKL (observed): %d\n", nObs));
175       sb.append(format("  CIF # HKL (all):      %d\n", nAll));
176       logger.info(sb.toString());
177     }
178 
179     logger.info(format(" Space group number %d", spacegroupNum));
180     Crystal crystal = new Crystal(cell[0], cell[1], cell[2], cell[3], cell[4], cell[5],
181         spacegroupNum);
182 
183     double sampling = 1.0 / 1.5;
184     if (properties != null) {
185       sampling = properties.getDouble("sampling", 1.0 / 1.5);
186     }
187     Resolution resolution = new Resolution(0.999999 * resHigh, sampling);
188     return new ReflectionList(crystal, resolution, properties);
189   }
190 
191   /** {@inheritDoc} */
192   @Override
193   public double getResolution(File cifFile, Crystal crystal) {
194     double resolution = Double.POSITIVE_INFINITY;
195     try (BufferedReader br = new BufferedReader(new FileReader(cifFile))) {
196       String string;
197       int nCol = 0;
198       boolean inHKL = false;
199       while ((string = br.readLine()) != null) {
200         String[] strArray = string.split("\\s+");
201         if (strArray[0].startsWith("_refln.")) {
202           inHKL = true;
203           br.mark(0);
204           String[] cifArray = strArray[0].split("\\.+");
205           switch (CIFHeader.toHeader(cifArray[1])) {
206             case index_h:
207               h = nCol;
208               break;
209             case index_k:
210               k = nCol;
211               break;
212             case index_l:
213               l = nCol;
214               break;
215           }
216           nCol++;
217         } else if (inHKL) {
218           if (h < 0 || k < 0 || l < 0) {
219             String message = " Fatal error in CIF file - no H K L indexes?\n";
220             logger.log(Level.SEVERE, message);
221             return -1.0;
222           }
223           break;
224         }
225       }
226 
227       // Go back to where the reflections start.
228       br.reset();
229       HKL hkl = new HKL();
230       while ((string = br.readLine()) != null) {
231 
232         // Reached end, break.
233         if (string.trim().startsWith("#END")) {
234           break;
235         } else if (string.trim().startsWith("data")) {
236           break;
237         } else if (string.trim().startsWith("#")) {
238           continue;
239         }
240 
241         // Some files split data on to multiple lines.
242         String[] strArray = string.trim().split("\\s+");
243         while (strArray.length < nCol) {
244           string = string + " " + br.readLine();
245           strArray = string.trim().split("\\s+");
246         }
247 
248         int ih = parseInt(strArray[h]);
249         int ik = parseInt(strArray[k]);
250         int il = parseInt(strArray[l]);
251 
252         hkl.setH(ih);
253         hkl.setK(ik);
254         hkl.setL(il);
255         resolution = min(resolution, crystal.res(hkl));
256       }
257     } catch (IOException e) {
258       String message = " CIF IO Exception.";
259       logger.log(Level.WARNING, message, e);
260       return -1.0;
261     }
262     return resolution;
263   }
264 
265   /** {@inheritDoc} */
266   @Override
267   public boolean readFile(
268       File cifFile,
269       ReflectionList reflectionList,
270       DiffractionRefinementData refinementData,
271       CompositeConfiguration properties) {
272 
273     int nRead, nNAN, nRes;
274     int nIgnore, nCIFIgnore;
275     int nFriedel, nCut;
276     boolean transpose = false;
277     boolean intensitiesToAmplitudes = false;
278 
279     StringBuilder sb = new StringBuilder();
280     sb.append(format(" Opening %s\n", cifFile.getName()));
281     if (refinementData.rFreeFlag < 0) {
282       refinementData.setFreeRFlag(1);
283       sb.append(format(" Setting R free flag to CIF default: %d\n", refinementData.rFreeFlag));
284     }
285 
286     try {
287       BufferedReader br = new BufferedReader(new FileReader(cifFile));
288 
289       String string;
290       int nCol = 0;
291       boolean inHKL = false;
292       while ((string = br.readLine()) != null) {
293         String[] stringArray = string.split("\\s+");
294         if (stringArray[0].startsWith("_refln.")) {
295           inHKL = true;
296           br.mark(0);
297           String[] cifArray = stringArray[0].split("\\.+");
298           switch (CIFHeader.toHeader(cifArray[1])) {
299             case index_h:
300               h = nCol;
301               break;
302             case index_k:
303               k = nCol;
304               break;
305             case index_l:
306               l = nCol;
307               break;
308             case F_meas:
309             case F_meas_au:
310               fo = nCol;
311               break;
312             case F_meas_sigma:
313             case F_meas_sigma_au:
314               sigFo = nCol;
315               break;
316             case intensity_meas:
317               io = nCol;
318               break;
319             case intensity_sigma:
320               sigIo = nCol;
321               break;
322             case status:
323               rFree = nCol;
324               break;
325           }
326           nCol++;
327         } else if (inHKL) {
328           if (h < 0 || k < 0 || l < 0) {
329             String message = " Fatal error in CIF file - no H K L indexes?\n";
330             logger.log(Level.SEVERE, message);
331             return false;
332           }
333           break;
334         }
335       }
336 
337       if (fo < 0 && sigFo < 0 && io > 0 && sigIo > 0) {
338         intensitiesToAmplitudes = true;
339       }
340 
341       if (fo < 0 && io < 0) {
342         logger.severe(" Reflection data (I/F) not found in CIF file!");
343       }
344 
345       // Go back to where the reflections start.
346       br.reset();
347 
348       // Check if HKLs need to be transposed or not.
349       HKL mate = new HKL();
350       int nPosIgnore = 0;
351       int nTransIgnore = 0;
352       while ((string = br.readLine()) != null) {
353 
354         if (string.trim().startsWith("#END")) {
355           // Reached end, break.
356           break;
357         } else if (string.trim().startsWith("data")) {
358           break;
359         } else if (string.trim().startsWith("#")) {
360           continue;
361         }
362 
363         // Some files split data on to multiple lines.
364         String[] strArray = string.trim().split("\\s+");
365         while (strArray.length < nCol) {
366           string = string + " " + br.readLine();
367           strArray = string.trim().split("\\s+");
368         }
369 
370         if (rFree > 0) {
371           // Ignored cases.
372           if (strArray[rFree].charAt(0) == 'x'
373               || strArray[rFree].charAt(0) == '<'
374               || strArray[rFree].charAt(0) == '-'
375               || strArray[rFree].charAt(0) == 'h'
376               || strArray[rFree].charAt(0) == 'l') {
377             continue;
378           }
379         }
380 
381         int ih = parseInt(strArray[h]);
382         int ik = parseInt(strArray[k]);
383         int il = parseInt(strArray[l]);
384 
385         boolean friedel = reflectionList.findSymHKL(ih, ik, il, mate, false);
386         HKL hklPos = reflectionList.getHKL(mate);
387         if (hklPos == null) {
388           nPosIgnore++;
389         }
390 
391         friedel = reflectionList.findSymHKL(ih, ik, il, mate, true);
392         HKL hklTrans = reflectionList.getHKL(mate);
393         if (hklTrans == null) {
394           nTransIgnore++;
395         }
396       }
397       if (nPosIgnore > nTransIgnore) {
398         transpose = true;
399       }
400 
401       // Re-open to start at beginning.
402       br = new BufferedReader(new FileReader(cifFile));
403       inHKL = false;
404       while ((string = br.readLine()) != null) {
405         String[] strArray = string.split("\\s+");
406         if (strArray[0].startsWith("_refln.")) {
407           br.mark(0);
408           inHKL = true;
409         } else if (inHKL) {
410           break;
411         }
412       }
413 
414       // Go back to where the reflections start.
415       br.reset();
416 
417       // Read in data.
418       double[][] anofSigF = new double[refinementData.n][4];
419       for (int i = 0; i < refinementData.n; i++) {
420         anofSigF[i][0] = anofSigF[i][1] = anofSigF[i][2] = anofSigF[i][3] = Double.NaN;
421       }
422       nRead = nNAN = nRes = nIgnore = nCIFIgnore = nFriedel = nCut = 0;
423       while ((string = br.readLine()) != null) {
424 
425         // Reached end, break.
426         if (string.trim().startsWith("#END")) {
427           break;
428         } else if (string.trim().startsWith("data")) {
429           break;
430         } else if (string.trim().startsWith("#")) {
431           continue;
432         }
433 
434         // Some files split data on to multiple lines.
435         String[] strArray = string.trim().split("\\s+");
436         while (strArray.length < nCol) {
437           string = string + " " + br.readLine();
438           strArray = string.trim().split("\\s+");
439         }
440 
441         int ih = parseInt(strArray[h]);
442         int ik = parseInt(strArray[k]);
443         int il = parseInt(strArray[l]);
444 
445         boolean friedel = reflectionList.findSymHKL(ih, ik, il, mate, transpose);
446         HKL hkl = reflectionList.getHKL(mate);
447         if (hkl != null) {
448           boolean isnull = false;
449           if (rFree > 0) {
450             if (strArray[rFree].charAt(0) == 'o') {
451               refinementData.setFreeR(hkl.getIndex(), 0);
452             } else if (strArray[rFree].charAt(0) == 'f') {
453               refinementData.setFreeR(hkl.getIndex(), 1);
454             } else if (strArray[rFree].charAt(0) == 'x') {
455               isnull = true;
456               nNAN++;
457             } else if (strArray[rFree].charAt(0) == '<'
458                 || strArray[rFree].charAt(0) == '-'
459                 || strArray[rFree].charAt(0) == 'h'
460                 || strArray[rFree].charAt(0) == 'l') {
461               isnull = true;
462               nCIFIgnore++;
463             } else {
464               refinementData.setFreeR(hkl.getIndex(), parseInt(strArray[rFree]));
465             }
466           }
467 
468           if (!intensitiesToAmplitudes && !isnull) {
469             if (strArray[fo].charAt(0) == '?' || strArray[sigFo].charAt(0) == '?') {
470               nNAN++;
471               continue;
472             }
473 
474             if (refinementData.fSigFCutoff > 0.0) {
475               double f1 = parseDouble(strArray[fo]);
476               double sigf1 = parseDouble(strArray[sigFo]);
477               if ((f1 / sigf1) < refinementData.fSigFCutoff) {
478                 nCut++;
479                 continue;
480               }
481             }
482 
483             if (friedel) {
484               anofSigF[hkl.getIndex()][2] = parseDouble(strArray[fo]);
485               anofSigF[hkl.getIndex()][3] = parseDouble(strArray[sigFo]);
486               nFriedel++;
487             } else {
488               anofSigF[hkl.getIndex()][0] = parseDouble(strArray[fo]);
489               anofSigF[hkl.getIndex()][1] = parseDouble(strArray[sigFo]);
490             }
491           }
492 
493           if (intensitiesToAmplitudes && !isnull) {
494             if (strArray[io].charAt(0) == '?' || strArray[sigIo].charAt(0) == '?') {
495               nNAN++;
496               continue;
497             }
498 
499             if (friedel) {
500               anofSigF[hkl.getIndex()][2] = parseDouble(strArray[io]);
501               anofSigF[hkl.getIndex()][3] = parseDouble(strArray[sigIo]);
502               nFriedel++;
503             } else {
504               anofSigF[hkl.getIndex()][0] = parseDouble(strArray[io]);
505               anofSigF[hkl.getIndex()][1] = parseDouble(strArray[sigIo]);
506             }
507           }
508 
509           nRead++;
510         } else {
511           HKL tmp = new HKL(ih, ik, il);
512           if (!reflectionList.resolution.inInverseResSqRange(
513               reflectionList.crystal.invressq(tmp))) {
514             nRes++;
515           } else {
516             nIgnore++;
517           }
518         }
519       }
520       br.close();
521 
522       // Set up fsigf from F+ and F-.
523       refinementData.generateFsigFfromAnomalousFsigF(anofSigF);
524       if (intensitiesToAmplitudes) {
525         refinementData.intensitiesToAmplitudes();
526       }
527     } catch (IOException ioe) {
528       System.out.println(" IO Exception: " + ioe.getMessage());
529       return false;
530     }
531 
532     sb.append(format(" HKL data is %s\n", transpose ? "transposed" : "not transposed"));
533     sb.append(format(" HKL read in:                             %d\n", nRead));
534     sb.append(format(" HKL read as Friedel mates:               %d\n", nFriedel));
535     sb.append(format(" HKL with NaN (ignored):                  %d\n", nNAN));
536     sb.append(format(" HKL NOT read in (status <, -, h or l):   %d\n", nCIFIgnore));
537     sb.append(format(" HKL NOT read in (too high resolution):   %d\n", nRes));
538     sb.append(format(" HKL NOT read in (not in internal list?): %d\n", nIgnore));
539     sb.append(format(" HKL NOT read in (F/sigF cutoff):         %d\n", nCut));
540     sb.append(
541         format(" HKL in internal list:                    %d\n", reflectionList.hklList.size()));
542 
543     if (logger.isLoggable(Level.INFO)) {
544       logger.info(sb.toString());
545     }
546 
547     boolean doRFree = properties.getBoolean("generate-rfree", false);
548     if (doRFree) {
549       logger.info(" Generating R free flags");
550       refinementData.generateRFree();
551     }
552     return true;
553   }
554 
555   private enum CIFHeader {
556     d_resolution_high,
557     number_all,
558     number_obs,
559     length_a,
560     length_b,
561     length_c,
562     angle_alpha,
563     angle_beta,
564     angle_gamma,
565     Int_Tables_number,
566     space_group_name_H_M,
567     crystal_id,
568     wavelength_id,
569     scale_group_code,
570     status,
571     index_h,
572     index_k,
573     index_l,
574     F_meas,
575     F_meas_au,
576     F_meas_sigma,
577     F_meas_sigma_au,
578     intensity_meas,
579     intensity_sigma,
580     NOVALUE;
581 
582     public static CIFHeader toHeader(String str) {
583       try {
584         return valueOf(str.replace('-', '_'));
585       } catch (Exception ex) {
586         return NOVALUE;
587       }
588     }
589   }
590 }