1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38 package ffx.potential.commands;
39
40 import ffx.potential.cli.PotentialCommand;
41 import ffx.utilities.FFXBinding;
42 import org.biojava.nbio.core.sequence.ProteinSequence;
43 import org.biojava.nbio.core.sequence.io.FastaReaderHelper;
44 import org.biojava.nbio.core.sequence.io.FastaWriterHelper;
45 import picocli.CommandLine.Command;
46 import picocli.CommandLine.Option;
47 import picocli.CommandLine.Parameters;
48
49 import java.io.File;
50 import java.util.ArrayList;
51 import java.util.Collection;
52
53 import static java.lang.String.format;
54 import static org.apache.commons.io.FilenameUtils.getName;
55
56
57
58
59
60
61
62 @Command(name = "Fasta", description = " Fasta outputs a sub-sequence from a FASTA file.")
63 public class Fasta extends PotentialCommand {
64
65
66 @Option(names = {"-f", "--firstResidue"}, paramLabel = "1", defaultValue = "1",
67 description = "Define the first Fasta residue to keep (index of the first residue is 1).")
68 private int firstResidue = 1;
69
70
71 @Option(names = {"-l", "--lastResidue"}, paramLabel = "-1", defaultValue = "-1",
72 description = "Define the last Fasta residue to keep (index of the last residue is n).")
73 private int lastResidue = -1;
74
75
76 @Parameters(arity = "1", paramLabel = "file",
77 description = "A file in FASTA format.")
78 private String fastaName = null;
79
80 private ProteinSequence proteinSequence;
81
82 public Fasta() {
83 super();
84 }
85
86 public Fasta(FFXBinding binding) {
87 super(binding);
88 }
89
90 public Fasta(String[] args) {
91 super(args);
92 }
93
94 @Override
95 public Fasta run() {
96 if (!init()) {
97 return this;
98 }
99
100 if (fastaName == null) {
101 logger.info(helpString());
102 return this;
103 }
104
105 logger.info("\n Opening FASTA " + fastaName);
106
107 try {
108 java.util.Map<String, ProteinSequence> fastaData =
109 FastaReaderHelper.readFastaProteinSequence(new File(fastaName));
110 if (fastaData == null || fastaData.isEmpty()) {
111 logger.warning(" No sequences found in FASTA file: " + fastaName);
112 return this;
113 }
114 ProteinSequence sequence = fastaData.values().iterator().next();
115 String seq = sequence.getSequenceAsString();
116 int length = seq.length();
117 logger.info(format("\n %s of length: %d\n %s", sequence.getOriginalHeader(), length, seq));
118
119 if (firstResidue < 1 || firstResidue > length) {
120 firstResidue = 1;
121 }
122 if (lastResidue < firstResidue || lastResidue > length) {
123 lastResidue = length;
124 }
125
126 proteinSequence = new ProteinSequence(seq.substring(firstResidue - 1, lastResidue));
127 proteinSequence.setOriginalHeader(sequence.getOriginalHeader());
128 length = proteinSequence.getLength();
129 logger.info(format("\n New sequence from residue %d to residue %d is of length %d: \n %s",
130 firstResidue, lastResidue, length, proteinSequence.toString()));
131
132 Collection<ProteinSequence> proteinSequenceCollection = new ArrayList<>();
133 proteinSequenceCollection.add(proteinSequence);
134
135
136 String dirString = getBaseDirString(fastaName);
137 File saveFile = potentialFunctions.versionFile(new File(dirString + getName(fastaName)));
138
139 logger.info(format("\n Saving new Fasta file to: %s", saveFile.getAbsolutePath()));
140 FastaWriterHelper.writeProteinSequence(saveFile, proteinSequenceCollection);
141 } catch (Exception e) {
142 logger.warning(" Exception processing FASTA file: " + e.getMessage());
143 }
144
145 return this;
146 }
147 }