1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38 package ffx.xray.parsers;
39
40 import ffx.crystal.Crystal;
41 import ffx.crystal.HKL;
42 import ffx.crystal.ReflectionList;
43 import ffx.crystal.Resolution;
44 import ffx.crystal.SpaceGroupInfo;
45 import ffx.xray.DiffractionRefinementData;
46 import org.apache.commons.configuration2.CompositeConfiguration;
47
48 import java.io.BufferedReader;
49 import java.io.File;
50 import java.io.FileReader;
51 import java.io.IOException;
52 import java.util.logging.Level;
53 import java.util.logging.Logger;
54
55 import static ffx.crystal.SpaceGroupDefinitions.spaceGroupNumber;
56 import static ffx.crystal.SpaceGroupInfo.pdb2ShortName;
57 import static java.lang.Double.parseDouble;
58 import static java.lang.Integer.parseInt;
59 import static java.lang.String.format;
60 import static org.apache.commons.math3.util.FastMath.min;
61
62
63
64
65
66
67
68 public class CIFFilter implements DiffractionFileFilter {
69
70 private static final Logger logger = Logger.getLogger(CIFFilter.class.getName());
71 private final double[] cell = {-1.0, -1.0, -1.0, -1.0, -1.0, -1.0};
72 private double resHigh = -1.0;
73 private String spacegroupName = null;
74 private int spacegroupNum = -1;
75 private int h = -1, k = -1, l = -1;
76 private int fo = -1, sigFo = -1;
77 private int io = -1, sigIo = -1;
78 private int rFree = -1;
79 private int nAll, nObs;
80
81
82 public CIFFilter() {
83 }
84
85
86 @Override
87 public ReflectionList getReflectionList(File cifFile) {
88 return getReflectionList(cifFile, null);
89 }
90
91
92 @Override
93 public ReflectionList getReflectionList(File cifFile, CompositeConfiguration properties) {
94 try (BufferedReader br = new BufferedReader(new FileReader(cifFile))) {
95 String string;
96 while ((string = br.readLine()) != null) {
97
98
99 if (string.startsWith("_refln.")) {
100 break;
101 }
102 String[] strArray = string.split("\\s+");
103 if (strArray[0].startsWith("_reflns")
104 || strArray[0].startsWith("_cell")
105 || strArray[0].startsWith("_symmetry")) {
106 String[] cifArray = strArray[0].split("\\.+");
107 switch (CIFHeader.toHeader(cifArray[1])) {
108 case d_resolution_high:
109 resHigh = parseDouble(strArray[1]);
110 break;
111 case number_all:
112 nAll = parseInt(strArray[1]);
113 break;
114 case number_obs:
115 nObs = parseInt(strArray[1]);
116 break;
117 case length_a:
118 cell[0] = parseDouble(strArray[1]);
119 break;
120 case length_b:
121 cell[1] = parseDouble(strArray[1]);
122 break;
123 case length_c:
124 cell[2] = parseDouble(strArray[1]);
125 break;
126 case angle_alpha:
127 cell[3] = parseDouble(strArray[1]);
128 break;
129 case angle_beta:
130 cell[4] = parseDouble(strArray[1]);
131 break;
132 case angle_gamma:
133 cell[5] = parseDouble(strArray[1]);
134 break;
135 case Int_Tables_number:
136 spacegroupNum = parseInt(strArray[1]);
137 break;
138 case space_group_name_H_M:
139 String[] spacegroupNameArray = string.split("'+");
140 if (spacegroupNameArray.length > 1) {
141 spacegroupName = spacegroupNameArray[1];
142 } else if (strArray.length > 1) {
143 spacegroupName = strArray[1];
144 }
145 break;
146 }
147 }
148 }
149 } catch (IOException e) {
150 String message = " CIF IO Exception.";
151 logger.log(Level.WARNING, message, e);
152 return null;
153 }
154
155 if (spacegroupNum < 0 && spacegroupName != null) {
156 spacegroupNum = spaceGroupNumber(pdb2ShortName(spacegroupName));
157 }
158
159 if (spacegroupNum < 0 || resHigh < 0 || cell[0] < 0) {
160 logger.info(
161 " The CIF header contains insufficient information to generate the reflection list.");
162 return null;
163 }
164
165 if (logger.isLoggable(Level.INFO)) {
166 StringBuilder sb = new StringBuilder();
167 sb.append(format("\nOpening %s\n", cifFile.getName()));
168 sb.append(" Setting up Reflection List based on CIF:\n");
169 sb.append(format(" spacegroup #: %d (name: %s)\n",
170 spacegroupNum, SpaceGroupInfo.spaceGroupNames[spacegroupNum - 1]));
171 sb.append(format(" Resolution: %8.3f\n", 0.999999 * resHigh));
172 sb.append(format(" Cell: %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f\n",
173 cell[0], cell[1], cell[2], cell[3], cell[4], cell[5]));
174 sb.append(format("\n CIF # HKL (observed): %d\n", nObs));
175 sb.append(format(" CIF # HKL (all): %d\n", nAll));
176 logger.info(sb.toString());
177 }
178
179 logger.info(format(" Space group number %d", spacegroupNum));
180 Crystal crystal = new Crystal(cell[0], cell[1], cell[2], cell[3], cell[4], cell[5],
181 spacegroupNum);
182
183 double sampling = 1.0 / 1.5;
184 if (properties != null) {
185 sampling = properties.getDouble("sampling", 1.0 / 1.5);
186 }
187 Resolution resolution = new Resolution(0.999999 * resHigh, sampling);
188 return new ReflectionList(crystal, resolution, properties);
189 }
190
191
192 @Override
193 public double getResolution(File cifFile, Crystal crystal) {
194 double resolution = Double.POSITIVE_INFINITY;
195 try (BufferedReader br = new BufferedReader(new FileReader(cifFile))) {
196 String string;
197 int nCol = 0;
198 boolean inHKL = false;
199 while ((string = br.readLine()) != null) {
200 String[] strArray = string.split("\\s+");
201 if (strArray[0].startsWith("_refln.")) {
202 inHKL = true;
203 br.mark(0);
204 String[] cifArray = strArray[0].split("\\.+");
205 switch (CIFHeader.toHeader(cifArray[1])) {
206 case index_h:
207 h = nCol;
208 break;
209 case index_k:
210 k = nCol;
211 break;
212 case index_l:
213 l = nCol;
214 break;
215 }
216 nCol++;
217 } else if (inHKL) {
218 if (h < 0 || k < 0 || l < 0) {
219 String message = " Fatal error in CIF file - no H K L indexes?\n";
220 logger.log(Level.SEVERE, message);
221 return -1.0;
222 }
223 break;
224 }
225 }
226
227
228 br.reset();
229 HKL hkl = new HKL();
230 while ((string = br.readLine()) != null) {
231
232
233 if (string.trim().startsWith("#END")) {
234 break;
235 } else if (string.trim().startsWith("data")) {
236 break;
237 } else if (string.trim().startsWith("#")) {
238 continue;
239 }
240
241
242 String[] strArray = string.trim().split("\\s+");
243 while (strArray.length < nCol) {
244 string = string + " " + br.readLine();
245 strArray = string.trim().split("\\s+");
246 }
247
248 int ih = parseInt(strArray[h]);
249 int ik = parseInt(strArray[k]);
250 int il = parseInt(strArray[l]);
251
252 hkl.setH(ih);
253 hkl.setK(ik);
254 hkl.setL(il);
255 resolution = min(resolution, crystal.res(hkl));
256 }
257 } catch (IOException e) {
258 String message = " CIF IO Exception.";
259 logger.log(Level.WARNING, message, e);
260 return -1.0;
261 }
262 return resolution;
263 }
264
265
266 @Override
267 public boolean readFile(
268 File cifFile,
269 ReflectionList reflectionList,
270 DiffractionRefinementData refinementData,
271 CompositeConfiguration properties) {
272
273 int nRead, nNAN, nRes;
274 int nIgnore, nCIFIgnore;
275 int nFriedel, nCut;
276 boolean transpose = false;
277 boolean intensitiesToAmplitudes = false;
278
279 StringBuilder sb = new StringBuilder();
280 sb.append(format(" Opening %s\n", cifFile.getName()));
281 if (refinementData.rFreeFlag < 0) {
282 refinementData.setFreeRFlag(1);
283 sb.append(format(" Setting R free flag to CIF default: %d\n", refinementData.rFreeFlag));
284 }
285
286 try {
287 BufferedReader br = new BufferedReader(new FileReader(cifFile));
288
289 String string;
290 int nCol = 0;
291 boolean inHKL = false;
292 while ((string = br.readLine()) != null) {
293 String[] stringArray = string.split("\\s+");
294 if (stringArray[0].startsWith("_refln.")) {
295 inHKL = true;
296 br.mark(0);
297 String[] cifArray = stringArray[0].split("\\.+");
298 switch (CIFHeader.toHeader(cifArray[1])) {
299 case index_h:
300 h = nCol;
301 break;
302 case index_k:
303 k = nCol;
304 break;
305 case index_l:
306 l = nCol;
307 break;
308 case F_meas:
309 case F_meas_au:
310 fo = nCol;
311 break;
312 case F_meas_sigma:
313 case F_meas_sigma_au:
314 sigFo = nCol;
315 break;
316 case intensity_meas:
317 io = nCol;
318 break;
319 case intensity_sigma:
320 sigIo = nCol;
321 break;
322 case status:
323 rFree = nCol;
324 break;
325 }
326 nCol++;
327 } else if (inHKL) {
328 if (h < 0 || k < 0 || l < 0) {
329 String message = " Fatal error in CIF file - no H K L indexes?\n";
330 logger.log(Level.SEVERE, message);
331 return false;
332 }
333 break;
334 }
335 }
336
337 if (fo < 0 && sigFo < 0 && io > 0 && sigIo > 0) {
338 intensitiesToAmplitudes = true;
339 }
340
341 if (fo < 0 && io < 0) {
342 logger.severe(" Reflection data (I/F) not found in CIF file!");
343 }
344
345
346 br.reset();
347
348
349 HKL mate = new HKL();
350 int nPosIgnore = 0;
351 int nTransIgnore = 0;
352 while ((string = br.readLine()) != null) {
353
354 if (string.trim().startsWith("#END")) {
355
356 break;
357 } else if (string.trim().startsWith("data")) {
358 break;
359 } else if (string.trim().startsWith("#")) {
360 continue;
361 }
362
363
364 String[] strArray = string.trim().split("\\s+");
365 while (strArray.length < nCol) {
366 string = string + " " + br.readLine();
367 strArray = string.trim().split("\\s+");
368 }
369
370 if (rFree > 0) {
371
372 if (strArray[rFree].charAt(0) == 'x'
373 || strArray[rFree].charAt(0) == '<'
374 || strArray[rFree].charAt(0) == '-'
375 || strArray[rFree].charAt(0) == 'h'
376 || strArray[rFree].charAt(0) == 'l') {
377 continue;
378 }
379 }
380
381 int ih = parseInt(strArray[h]);
382 int ik = parseInt(strArray[k]);
383 int il = parseInt(strArray[l]);
384
385 boolean friedel = reflectionList.findSymHKL(ih, ik, il, mate, false);
386 HKL hklPos = reflectionList.getHKL(mate);
387 if (hklPos == null) {
388 nPosIgnore++;
389 }
390
391 friedel = reflectionList.findSymHKL(ih, ik, il, mate, true);
392 HKL hklTrans = reflectionList.getHKL(mate);
393 if (hklTrans == null) {
394 nTransIgnore++;
395 }
396 }
397 if (nPosIgnore > nTransIgnore) {
398 transpose = true;
399 }
400
401
402 br = new BufferedReader(new FileReader(cifFile));
403 inHKL = false;
404 while ((string = br.readLine()) != null) {
405 String[] strArray = string.split("\\s+");
406 if (strArray[0].startsWith("_refln.")) {
407 br.mark(0);
408 inHKL = true;
409 } else if (inHKL) {
410 break;
411 }
412 }
413
414
415 br.reset();
416
417
418 double[][] anofSigF = new double[refinementData.n][4];
419 for (int i = 0; i < refinementData.n; i++) {
420 anofSigF[i][0] = anofSigF[i][1] = anofSigF[i][2] = anofSigF[i][3] = Double.NaN;
421 }
422 nRead = nNAN = nRes = nIgnore = nCIFIgnore = nFriedel = nCut = 0;
423 while ((string = br.readLine()) != null) {
424
425
426 if (string.trim().startsWith("#END")) {
427 break;
428 } else if (string.trim().startsWith("data")) {
429 break;
430 } else if (string.trim().startsWith("#")) {
431 continue;
432 }
433
434
435 String[] strArray = string.trim().split("\\s+");
436 while (strArray.length < nCol) {
437 string = string + " " + br.readLine();
438 strArray = string.trim().split("\\s+");
439 }
440
441 int ih = parseInt(strArray[h]);
442 int ik = parseInt(strArray[k]);
443 int il = parseInt(strArray[l]);
444
445 boolean friedel = reflectionList.findSymHKL(ih, ik, il, mate, transpose);
446 HKL hkl = reflectionList.getHKL(mate);
447 if (hkl != null) {
448 boolean isnull = false;
449 if (rFree > 0) {
450 if (strArray[rFree].charAt(0) == 'o') {
451 refinementData.setFreeR(hkl.getIndex(), 0);
452 } else if (strArray[rFree].charAt(0) == 'f') {
453 refinementData.setFreeR(hkl.getIndex(), 1);
454 } else if (strArray[rFree].charAt(0) == 'x') {
455 isnull = true;
456 nNAN++;
457 } else if (strArray[rFree].charAt(0) == '<'
458 || strArray[rFree].charAt(0) == '-'
459 || strArray[rFree].charAt(0) == 'h'
460 || strArray[rFree].charAt(0) == 'l') {
461 isnull = true;
462 nCIFIgnore++;
463 } else {
464 refinementData.setFreeR(hkl.getIndex(), parseInt(strArray[rFree]));
465 }
466 }
467
468 if (!intensitiesToAmplitudes && !isnull) {
469 if (strArray[fo].charAt(0) == '?' || strArray[sigFo].charAt(0) == '?') {
470 nNAN++;
471 continue;
472 }
473
474 if (refinementData.fSigFCutoff > 0.0) {
475 double f1 = parseDouble(strArray[fo]);
476 double sigf1 = parseDouble(strArray[sigFo]);
477 if ((f1 / sigf1) < refinementData.fSigFCutoff) {
478 nCut++;
479 continue;
480 }
481 }
482
483 if (friedel) {
484 anofSigF[hkl.getIndex()][2] = parseDouble(strArray[fo]);
485 anofSigF[hkl.getIndex()][3] = parseDouble(strArray[sigFo]);
486 nFriedel++;
487 } else {
488 anofSigF[hkl.getIndex()][0] = parseDouble(strArray[fo]);
489 anofSigF[hkl.getIndex()][1] = parseDouble(strArray[sigFo]);
490 }
491 }
492
493 if (intensitiesToAmplitudes && !isnull) {
494 if (strArray[io].charAt(0) == '?' || strArray[sigIo].charAt(0) == '?') {
495 nNAN++;
496 continue;
497 }
498
499 if (friedel) {
500 anofSigF[hkl.getIndex()][2] = parseDouble(strArray[io]);
501 anofSigF[hkl.getIndex()][3] = parseDouble(strArray[sigIo]);
502 nFriedel++;
503 } else {
504 anofSigF[hkl.getIndex()][0] = parseDouble(strArray[io]);
505 anofSigF[hkl.getIndex()][1] = parseDouble(strArray[sigIo]);
506 }
507 }
508
509 nRead++;
510 } else {
511 HKL tmp = new HKL(ih, ik, il);
512 if (!reflectionList.resolution.inInverseResSqRange(
513 reflectionList.crystal.invressq(tmp))) {
514 nRes++;
515 } else {
516 nIgnore++;
517 }
518 }
519 }
520 br.close();
521
522
523 refinementData.generateFsigFfromAnomalousFsigF(anofSigF);
524 if (intensitiesToAmplitudes) {
525 refinementData.intensitiesToAmplitudes();
526 }
527 } catch (IOException ioe) {
528 System.out.println(" IO Exception: " + ioe.getMessage());
529 return false;
530 }
531
532 sb.append(format(" HKL data is %s\n", transpose ? "transposed" : "not transposed"));
533 sb.append(format(" HKL read in: %d\n", nRead));
534 sb.append(format(" HKL read as Friedel mates: %d\n", nFriedel));
535 sb.append(format(" HKL with NaN (ignored): %d\n", nNAN));
536 sb.append(format(" HKL NOT read in (status <, -, h or l): %d\n", nCIFIgnore));
537 sb.append(format(" HKL NOT read in (too high resolution): %d\n", nRes));
538 sb.append(format(" HKL NOT read in (not in internal list?): %d\n", nIgnore));
539 sb.append(format(" HKL NOT read in (F/sigF cutoff): %d\n", nCut));
540 sb.append(
541 format(" HKL in internal list: %d\n", reflectionList.hklList.size()));
542
543 if (logger.isLoggable(Level.INFO)) {
544 logger.info(sb.toString());
545 }
546
547 boolean doRFree = properties.getBoolean("generate-rfree", false);
548 if (doRFree) {
549 logger.info(" Generating R free flags");
550 refinementData.generateRFree();
551 }
552 return true;
553 }
554
555 private enum CIFHeader {
556 d_resolution_high,
557 number_all,
558 number_obs,
559 length_a,
560 length_b,
561 length_c,
562 angle_alpha,
563 angle_beta,
564 angle_gamma,
565 Int_Tables_number,
566 space_group_name_H_M,
567 crystal_id,
568 wavelength_id,
569 scale_group_code,
570 status,
571 index_h,
572 index_k,
573 index_l,
574 F_meas,
575 F_meas_au,
576 F_meas_sigma,
577 F_meas_sigma_au,
578 intensity_meas,
579 intensity_sigma,
580 NOVALUE;
581
582 public static CIFHeader toHeader(String str) {
583 try {
584 return valueOf(str.replace('-', '_'));
585 } catch (Exception ex) {
586 return NOVALUE;
587 }
588 }
589 }
590 }