1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38 package ffx.xray.parsers;
39
40 import static ffx.crystal.SpaceGroupDefinitions.spaceGroupNumber;
41 import static ffx.crystal.SpaceGroupInfo.pdb2ShortName;
42 import static java.lang.Double.parseDouble;
43 import static java.lang.Integer.parseInt;
44 import static java.lang.String.format;
45 import static org.apache.commons.math3.util.FastMath.min;
46
47 import ffx.crystal.Crystal;
48 import ffx.crystal.HKL;
49 import ffx.crystal.ReflectionList;
50 import ffx.crystal.Resolution;
51 import ffx.crystal.SpaceGroupInfo;
52 import ffx.xray.DiffractionRefinementData;
53 import java.io.BufferedReader;
54 import java.io.File;
55 import java.io.FileReader;
56 import java.io.IOException;
57 import java.util.logging.Level;
58 import java.util.logging.Logger;
59 import org.apache.commons.configuration2.CompositeConfiguration;
60
61
62
63
64
65
66
67 public class CIFFilter implements DiffractionFileFilter {
68
69 private static final Logger logger = Logger.getLogger(CIFFilter.class.getName());
70 private final double[] cell = {-1.0, -1.0, -1.0, -1.0, -1.0, -1.0};
71 private double resHigh = -1.0;
72 private String spacegroupName = null;
73 private int spacegroupNum = -1;
74 private int h = -1, k = -1, l = -1;
75 private int fo = -1, sigFo = -1;
76 private int io = -1, sigIo = -1;
77 private int rFree = -1;
78 private int nAll, nObs;
79
80
81 public CIFFilter() {
82 }
83
84
85 @Override
86 public ReflectionList getReflectionList(File cifFile) {
87 return getReflectionList(cifFile, null);
88 }
89
90
91 @Override
92 public ReflectionList getReflectionList(File cifFile, CompositeConfiguration properties) {
93 try (BufferedReader br = new BufferedReader(new FileReader(cifFile))) {
94 String string;
95 while ((string = br.readLine()) != null) {
96
97
98 if (string.startsWith("_refln.")) {
99 break;
100 }
101 String[] strArray = string.split("\\s+");
102 if (strArray[0].startsWith("_reflns")
103 || strArray[0].startsWith("_cell")
104 || strArray[0].startsWith("_symmetry")) {
105 String[] cifArray = strArray[0].split("\\.+");
106 switch (CIFHeader.toHeader(cifArray[1])) {
107 case d_resolution_high:
108 resHigh = parseDouble(strArray[1]);
109 break;
110 case number_all:
111 nAll = parseInt(strArray[1]);
112 break;
113 case number_obs:
114 nObs = parseInt(strArray[1]);
115 break;
116 case length_a:
117 cell[0] = parseDouble(strArray[1]);
118 break;
119 case length_b:
120 cell[1] = parseDouble(strArray[1]);
121 break;
122 case length_c:
123 cell[2] = parseDouble(strArray[1]);
124 break;
125 case angle_alpha:
126 cell[3] = parseDouble(strArray[1]);
127 break;
128 case angle_beta:
129 cell[4] = parseDouble(strArray[1]);
130 break;
131 case angle_gamma:
132 cell[5] = parseDouble(strArray[1]);
133 break;
134 case Int_Tables_number:
135 spacegroupNum = parseInt(strArray[1]);
136 break;
137 case space_group_name_H_M:
138 String[] spacegroupNameArray = string.split("'+");
139 if (spacegroupNameArray.length > 1) {
140 spacegroupName = spacegroupNameArray[1];
141 } else if (strArray.length > 1) {
142 spacegroupName = strArray[1];
143 }
144 break;
145 }
146 }
147 }
148 } catch (IOException e) {
149 String message = " CIF IO Exception.";
150 logger.log(Level.WARNING, message, e);
151 return null;
152 }
153
154 if (spacegroupNum < 0 && spacegroupName != null) {
155 spacegroupNum = spaceGroupNumber(pdb2ShortName(spacegroupName));
156 }
157
158 if (spacegroupNum < 0 || resHigh < 0 || cell[0] < 0) {
159 logger.info(
160 " The CIF header contains insufficient information to generate the reflection list.");
161 return null;
162 }
163
164 if (logger.isLoggable(Level.INFO)) {
165 StringBuilder sb = new StringBuilder();
166 sb.append(format("\nOpening %s\n", cifFile.getName()));
167 sb.append(" Setting up Reflection List based on CIF:\n");
168 sb.append(format(" spacegroup #: %d (name: %s)\n",
169 spacegroupNum, SpaceGroupInfo.spaceGroupNames[spacegroupNum - 1]));
170 sb.append(format(" Resolution: %8.3f\n", 0.999999 * resHigh));
171 sb.append(format(" Cell: %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f\n",
172 cell[0], cell[1], cell[2], cell[3], cell[4], cell[5]));
173 sb.append(format("\n CIF # HKL (observed): %d\n", nObs));
174 sb.append(format(" CIF # HKL (all): %d\n", nAll));
175 logger.info(sb.toString());
176 }
177
178 logger.info(format(" Space group number %d", spacegroupNum));
179 Crystal crystal = new Crystal(cell[0], cell[1], cell[2], cell[3], cell[4], cell[5],
180 spacegroupNum);
181
182 double sampling = 1.0 / 1.5;
183 if (properties != null) {
184 sampling = properties.getDouble("sampling", 1.0 / 1.5);
185 }
186 Resolution resolution = new Resolution(0.999999 * resHigh, sampling);
187 return new ReflectionList(crystal, resolution, properties);
188 }
189
190
191 @Override
192 public double getResolution(File cifFile, Crystal crystal) {
193 double resolution = Double.POSITIVE_INFINITY;
194 try (BufferedReader br = new BufferedReader(new FileReader(cifFile))) {
195 String string;
196 int nCol = 0;
197 boolean inHKL = false;
198 while ((string = br.readLine()) != null) {
199 String[] strArray = string.split("\\s+");
200 if (strArray[0].startsWith("_refln.")) {
201 inHKL = true;
202 br.mark(0);
203 String[] cifArray = strArray[0].split("\\.+");
204 switch (CIFHeader.toHeader(cifArray[1])) {
205 case index_h:
206 h = nCol;
207 break;
208 case index_k:
209 k = nCol;
210 break;
211 case index_l:
212 l = nCol;
213 break;
214 }
215 nCol++;
216 } else if (inHKL) {
217 if (h < 0 || k < 0 || l < 0) {
218 String message = " Fatal error in CIF file - no H K L indexes?\n";
219 logger.log(Level.SEVERE, message);
220 return -1.0;
221 }
222 break;
223 }
224 }
225
226
227 br.reset();
228 HKL hkl = new HKL();
229 while ((string = br.readLine()) != null) {
230
231
232 if (string.trim().startsWith("#END")) {
233 break;
234 } else if (string.trim().startsWith("data")) {
235 break;
236 } else if (string.trim().startsWith("#")) {
237 continue;
238 }
239
240
241 String[] strArray = string.trim().split("\\s+");
242 while (strArray.length < nCol) {
243 string = string + " " + br.readLine();
244 strArray = string.trim().split("\\s+");
245 }
246
247 int ih = parseInt(strArray[h]);
248 int ik = parseInt(strArray[k]);
249 int il = parseInt(strArray[l]);
250
251 hkl.setH(ih);
252 hkl.setK(ik);
253 hkl.setL(il);
254 resolution = min(resolution, crystal.res(hkl));
255 }
256 } catch (IOException e) {
257 String message = " CIF IO Exception.";
258 logger.log(Level.WARNING, message, e);
259 return -1.0;
260 }
261 return resolution;
262 }
263
264
265 @Override
266 public boolean readFile(
267 File cifFile,
268 ReflectionList reflectionList,
269 DiffractionRefinementData refinementData,
270 CompositeConfiguration properties) {
271
272 int nRead, nNAN, nRes;
273 int nIgnore, nCIFIgnore;
274 int nFriedel, nCut;
275 boolean transpose = false;
276 boolean intensitiesToAmplitudes = false;
277
278 StringBuilder sb = new StringBuilder();
279 sb.append(format(" Opening %s\n", cifFile.getName()));
280 if (refinementData.rFreeFlag < 0) {
281 refinementData.setFreeRFlag(1);
282 sb.append(format(" Setting R free flag to CIF default: %d\n", refinementData.rFreeFlag));
283 }
284
285 try {
286 BufferedReader br = new BufferedReader(new FileReader(cifFile));
287
288 String string;
289 int nCol = 0;
290 boolean inHKL = false;
291 while ((string = br.readLine()) != null) {
292 String[] stringArray = string.split("\\s+");
293 if (stringArray[0].startsWith("_refln.")) {
294 inHKL = true;
295 br.mark(0);
296 String[] cifArray = stringArray[0].split("\\.+");
297 switch (CIFHeader.toHeader(cifArray[1])) {
298 case index_h:
299 h = nCol;
300 break;
301 case index_k:
302 k = nCol;
303 break;
304 case index_l:
305 l = nCol;
306 break;
307 case F_meas:
308 case F_meas_au:
309 fo = nCol;
310 break;
311 case F_meas_sigma:
312 case F_meas_sigma_au:
313 sigFo = nCol;
314 break;
315 case intensity_meas:
316 io = nCol;
317 break;
318 case intensity_sigma:
319 sigIo = nCol;
320 break;
321 case status:
322 rFree = nCol;
323 break;
324 }
325 nCol++;
326 } else if (inHKL) {
327 if (h < 0 || k < 0 || l < 0) {
328 String message = " Fatal error in CIF file - no H K L indexes?\n";
329 logger.log(Level.SEVERE, message);
330 return false;
331 }
332 break;
333 }
334 }
335
336 if (fo < 0 && sigFo < 0 && io > 0 && sigIo > 0) {
337 intensitiesToAmplitudes = true;
338 }
339
340 if (fo < 0 && io < 0) {
341 logger.severe(" Reflection data (I/F) not found in CIF file!");
342 }
343
344
345 br.reset();
346
347
348 HKL mate = new HKL();
349 int nPosIgnore = 0;
350 int nTransIgnore = 0;
351 while ((string = br.readLine()) != null) {
352
353 if (string.trim().startsWith("#END")) {
354
355 break;
356 } else if (string.trim().startsWith("data")) {
357 break;
358 } else if (string.trim().startsWith("#")) {
359 continue;
360 }
361
362
363 String[] strArray = string.trim().split("\\s+");
364 while (strArray.length < nCol) {
365 string = string + " " + br.readLine();
366 strArray = string.trim().split("\\s+");
367 }
368
369 if (rFree > 0) {
370
371 if (strArray[rFree].charAt(0) == 'x'
372 || strArray[rFree].charAt(0) == '<'
373 || strArray[rFree].charAt(0) == '-'
374 || strArray[rFree].charAt(0) == 'h'
375 || strArray[rFree].charAt(0) == 'l') {
376 continue;
377 }
378 }
379
380 int ih = parseInt(strArray[h]);
381 int ik = parseInt(strArray[k]);
382 int il = parseInt(strArray[l]);
383
384 boolean friedel = reflectionList.findSymHKL(ih, ik, il, mate, false);
385 HKL hklPos = reflectionList.getHKL(mate);
386 if (hklPos == null) {
387 nPosIgnore++;
388 }
389
390 friedel = reflectionList.findSymHKL(ih, ik, il, mate, true);
391 HKL hklTrans = reflectionList.getHKL(mate);
392 if (hklTrans == null) {
393 nTransIgnore++;
394 }
395 }
396 if (nPosIgnore > nTransIgnore) {
397 transpose = true;
398 }
399
400
401 br = new BufferedReader(new FileReader(cifFile));
402 inHKL = false;
403 while ((string = br.readLine()) != null) {
404 String[] strArray = string.split("\\s+");
405 if (strArray[0].startsWith("_refln.")) {
406 br.mark(0);
407 inHKL = true;
408 } else if (inHKL) {
409 break;
410 }
411 }
412
413
414 br.reset();
415
416
417 double[][] anofSigF = new double[refinementData.n][4];
418 for (int i = 0; i < refinementData.n; i++) {
419 anofSigF[i][0] = anofSigF[i][1] = anofSigF[i][2] = anofSigF[i][3] = Double.NaN;
420 }
421 nRead = nNAN = nRes = nIgnore = nCIFIgnore = nFriedel = nCut = 0;
422 while ((string = br.readLine()) != null) {
423
424
425 if (string.trim().startsWith("#END")) {
426 break;
427 } else if (string.trim().startsWith("data")) {
428 break;
429 } else if (string.trim().startsWith("#")) {
430 continue;
431 }
432
433
434 String[] strArray = string.trim().split("\\s+");
435 while (strArray.length < nCol) {
436 string = string + " " + br.readLine();
437 strArray = string.trim().split("\\s+");
438 }
439
440 int ih = parseInt(strArray[h]);
441 int ik = parseInt(strArray[k]);
442 int il = parseInt(strArray[l]);
443
444 boolean friedel = reflectionList.findSymHKL(ih, ik, il, mate, transpose);
445 HKL hkl = reflectionList.getHKL(mate);
446 if (hkl != null) {
447 boolean isnull = false;
448 if (rFree > 0) {
449 if (strArray[rFree].charAt(0) == 'o') {
450 refinementData.setFreeR(hkl.getIndex(), 0);
451 } else if (strArray[rFree].charAt(0) == 'f') {
452 refinementData.setFreeR(hkl.getIndex(), 1);
453 } else if (strArray[rFree].charAt(0) == 'x') {
454 isnull = true;
455 nNAN++;
456 } else if (strArray[rFree].charAt(0) == '<'
457 || strArray[rFree].charAt(0) == '-'
458 || strArray[rFree].charAt(0) == 'h'
459 || strArray[rFree].charAt(0) == 'l') {
460 isnull = true;
461 nCIFIgnore++;
462 } else {
463 refinementData.setFreeR(hkl.getIndex(), parseInt(strArray[rFree]));
464 }
465 }
466
467 if (!intensitiesToAmplitudes && !isnull) {
468 if (strArray[fo].charAt(0) == '?' || strArray[sigFo].charAt(0) == '?') {
469 nNAN++;
470 continue;
471 }
472
473 if (refinementData.fSigFCutoff > 0.0) {
474 double f1 = parseDouble(strArray[fo]);
475 double sigf1 = parseDouble(strArray[sigFo]);
476 if ((f1 / sigf1) < refinementData.fSigFCutoff) {
477 nCut++;
478 continue;
479 }
480 }
481
482 if (friedel) {
483 anofSigF[hkl.getIndex()][2] = parseDouble(strArray[fo]);
484 anofSigF[hkl.getIndex()][3] = parseDouble(strArray[sigFo]);
485 nFriedel++;
486 } else {
487 anofSigF[hkl.getIndex()][0] = parseDouble(strArray[fo]);
488 anofSigF[hkl.getIndex()][1] = parseDouble(strArray[sigFo]);
489 }
490 }
491
492 if (intensitiesToAmplitudes && !isnull) {
493 if (strArray[io].charAt(0) == '?' || strArray[sigIo].charAt(0) == '?') {
494 nNAN++;
495 continue;
496 }
497
498 if (friedel) {
499 anofSigF[hkl.getIndex()][2] = parseDouble(strArray[io]);
500 anofSigF[hkl.getIndex()][3] = parseDouble(strArray[sigIo]);
501 nFriedel++;
502 } else {
503 anofSigF[hkl.getIndex()][0] = parseDouble(strArray[io]);
504 anofSigF[hkl.getIndex()][1] = parseDouble(strArray[sigIo]);
505 }
506 }
507
508 nRead++;
509 } else {
510 HKL tmp = new HKL(ih, ik, il);
511 if (!reflectionList.resolution.inInverseResSqRange(
512 reflectionList.crystal.invressq(tmp))) {
513 nRes++;
514 } else {
515 nIgnore++;
516 }
517 }
518 }
519 br.close();
520
521
522 refinementData.generateFsigFfromAnomalousFsigF(anofSigF);
523 if (intensitiesToAmplitudes) {
524 refinementData.intensitiesToAmplitudes();
525 }
526 } catch (IOException ioe) {
527 System.out.println(" IO Exception: " + ioe.getMessage());
528 return false;
529 }
530
531 sb.append(format(" HKL data is %s\n", transpose ? "transposed" : "not transposed"));
532 sb.append(format(" HKL read in: %d\n", nRead));
533 sb.append(format(" HKL read as Friedel mates: %d\n", nFriedel));
534 sb.append(format(" HKL with NaN (ignored): %d\n", nNAN));
535 sb.append(format(" HKL NOT read in (status <, -, h or l): %d\n", nCIFIgnore));
536 sb.append(format(" HKL NOT read in (too high resolution): %d\n", nRes));
537 sb.append(format(" HKL NOT read in (not in internal list?): %d\n", nIgnore));
538 sb.append(format(" HKL NOT read in (F/sigF cutoff): %d\n", nCut));
539 sb.append(
540 format(" HKL in internal list: %d\n", reflectionList.hklList.size()));
541
542 if (logger.isLoggable(Level.INFO)) {
543 logger.info(sb.toString());
544 }
545
546 boolean doRFree = properties.getBoolean("generate-rfree", false);
547 if (doRFree) {
548 logger.info(" Generating R free flags");
549 refinementData.generateRFree();
550 }
551 return true;
552 }
553
554 private enum CIFHeader {
555 d_resolution_high,
556 number_all,
557 number_obs,
558 length_a,
559 length_b,
560 length_c,
561 angle_alpha,
562 angle_beta,
563 angle_gamma,
564 Int_Tables_number,
565 space_group_name_H_M,
566 crystal_id,
567 wavelength_id,
568 scale_group_code,
569 status,
570 index_h,
571 index_k,
572 index_l,
573 F_meas,
574 F_meas_au,
575 F_meas_sigma,
576 F_meas_sigma_au,
577 intensity_meas,
578 intensity_sigma,
579 NOVALUE;
580
581 public static CIFHeader toHeader(String str) {
582 try {
583 return valueOf(str.replace('-', '_'));
584 } catch (Exception ex) {
585 return NOVALUE;
586 }
587 }
588 }
589 }