View Javadoc
1   //******************************************************************************
2   //
3   // Title:       Force Field X.
4   // Description: Force Field X - Software for Molecular Biophysics.
5   // Copyright:   Copyright (c) Michael J. Schnieders 2001-2025.
6   //
7   // This file is part of Force Field X.
8   //
9   // Force Field X is free software; you can redistribute it and/or modify it
10  // under the terms of the GNU General Public License version 3 as published by
11  // the Free Software Foundation.
12  //
13  // Force Field X is distributed in the hope that it will be useful, but WITHOUT
14  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16  // details.
17  //
18  // You should have received a copy of the GNU General Public License along with
19  // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple
20  // Place, Suite 330, Boston, MA 02111-1307 USA
21  //
22  // Linking this library statically or dynamically with other modules is making a
23  // combined work based on this library. Thus, the terms and conditions of the
24  // GNU General Public License cover the whole combination.
25  //
26  // As a special exception, the copyright holders of this library give you
27  // permission to link this library with independent modules to produce an
28  // executable, regardless of the license terms of these independent modules, and
29  // to copy and distribute the resulting executable under terms of your choice,
30  // provided that you also meet, for each linked independent module, the terms
31  // and conditions of the license of that module. An independent module is a
32  // module which is not derived from or based on this library. If you modify this
33  // library, you may extend this exception to your version of the library, but
34  // you are not obligated to do so. If you do not wish to do so, delete this
35  // exception statement from your version.
36  //
37  //******************************************************************************
38  package ffx.algorithms.commands;
39  
40  import edu.rit.pj.Comm;
41  import edu.rit.pj.IntegerSchedule;
42  import edu.rit.pj.WorkerIntegerForLoop;
43  import edu.rit.pj.WorkerRegion;
44  import edu.rit.pj.WorkerTeam;
45  import ffx.algorithms.cli.AlgorithmsCommand;
46  import ffx.numerics.Potential;
47  import ffx.potential.Utilities;
48  import ffx.utilities.FFXCommand;
49  import ffx.utilities.FFXBinding;
50  import ffx.utilities.FileUtils;
51  import picocli.CommandLine.Command;
52  import picocli.CommandLine.Option;
53  import picocli.CommandLine.Unmatched;
54  
55  import java.io.File;
56  import java.util.ArrayList;
57  import java.util.Collections;
58  import java.util.List;
59  
60  import static java.lang.String.format;
61  import static org.apache.commons.io.FilenameUtils.normalize;
62  
63  /**
64   * Run an FFX command on a series of files. Parallel Java across nodes is supported.
65   * <p>
66   * Recursion through the directory structure is supported to a supplied level using the
67   * --recurse flag (0 only includes the current directory).
68   * <p>
69   * Files can be selected using a regular expression -- the default matches all
70   * files (".*").
71   * <p>
72   * To control placement of the variable file on the command line, the "FILE" string can be used. If
73   * present, it is replaced with the current file. If absent, the current file is last argument
74   * to the FFX command.
75   *
76   * <br>
77   * Usage:
78   * <br>
79   * ffxc ForEachFile [ForEachFile options] Command [Command options] &lt;FILE&gt; [&lt;FILE2&gt;]
80   */
81  @Command(description = " Run an FFX command on a series of files.", name = "ForEachFile")
82  public class ForEachFile extends AlgorithmsCommand {
83  
84    /**
85     * --recurse Maximum recursion level (0 only includes the current directory).
86     */
87    @Option(names = {"--recurse"}, defaultValue = "0", paramLabel = "0",
88        description = "Maximum recursion level (0 only includes the current directory).")
89    private int recurse;
90  
91    /**
92     * --regex --fileSelectionRegex Evaluate files that match a Regular expression (.* includes all files).
93     */
94    @Option(names = {"--regex", "--fileSelectionRegex"}, paramLabel = ".*", defaultValue = ".*",
95        description = "Locate files that match a regular expression ('.*' matches all files).")
96    private String regex;
97  
98    /**
99     * --regex2 --fileSelectionRegex2 Evaluate files that match a Regular expression (.* includes all files).
100    * The second regular expression can be used to place a second file on each command line (e.g., for dual-topology simulations).
101    */
102   @Option(names = {"--regex2", "--fileSelectionRegex2"}, paramLabel = "", defaultValue = "",
103       description = "Locate files that match a 2nd regular expression ('.*' matches all files).")
104   private String regex2;
105 
106   /**
107    * --schedule Load balancing will use a [Dynamic, Fixed, or Guided] schedule.
108    */
109   @Option(names = {"--schedule"}, defaultValue = "dynamic", paramLabel = "dynamic",
110       description = "Load balancing will use a [Dynamic, Fixed, or Guided] schedule.")
111   private String schedule;
112 
113   /**
114    * -v --verbose Decide whether to print additional logging.
115    */
116   @Option(names = {"-v", "--verbose"}, defaultValue = "false", paramLabel = "false",
117       description = "Print additional logging for errors.")
118   private boolean verbose;
119 
120   /**
121    * The final argument(s) should be one or more filenames.
122    */
123   @Unmatched
124   private List<String> unmatched = null;
125 
126   /**
127    * FFX Script to run in each process.
128    */
129   private Class<? extends FFXCommand> script;
130 
131   /**
132    * List of files.
133    */
134   private List<File> files;
135 
136   /**
137    * Place two files on each command line.
138    */
139   private boolean twoFilesPerCommand = false;
140 
141   /**
142    * List of second files to place on the command line.
143    */
144   private List<File> files2;
145 
146   /**
147    * Parallel Java Schedule.
148    */
149   private IntegerSchedule integerSchedule;
150 
151   /**
152    * ForEachFile Constructor.
153    */
154   public ForEachFile() {
155     super();
156   }
157 
158   /**
159    * ForEachFile Constructor.
160    *
161    * @param binding The Binding to use.
162    */
163   public ForEachFile(FFXBinding binding) {
164     super(binding);
165   }
166 
167   /**
168    * ForEachFile constructor that sets the command line arguments.
169    *
170    * @param args Command line arguments.
171    */
172   public ForEachFile(String[] args) {
173     super(args);
174   }
175 
176   /**
177    * {@inheritDoc}
178    */
179   @Override
180   public ForEachFile run() {
181 
182     if (!init()) {
183       return this;
184     }
185 
186     // Set a flag to avoid double use of MPI in downstream commands.
187     System.setProperty("pj.use.mpi", "false");
188 
189     script = getCommand(unmatched.get(0));
190     if (script != null) {
191       logger.info(format(" The %s will be run on each file.", script));
192     } else {
193       logger.info(format(" %s was not recognized.", unmatched.get(0)));
194       return this;
195     }
196 
197     Comm world = Comm.world();
198     int numProc = world.size();
199     int rank = world.rank();
200 
201     if (numProc > 1) {
202       logger.info(format(" Number of processes:  %d", numProc));
203       logger.info(format(" Rank of this process: %d", rank));
204     }
205 
206     // Remove the ForEachFile command.
207     unmatched.remove(0);
208 
209     // Collect the files.
210     File cwd = new File(".");
211     files = FileUtils.traverseFiles(cwd, recurse, regex);
212 
213     if (!regex2.isEmpty()) {
214       twoFilesPerCommand = true;
215       files2 = FileUtils.traverseFiles(cwd, recurse, regex2);
216     }
217 
218     // Sort the files.
219     Collections.sort(files);
220     if (twoFilesPerCommand) {
221       if (files.size() != files2.size()) {
222         logger.info(" The number of files matched by the two regular expressions do not agree.");
223         logger.info(" The number of files matched by the first regular expression: " + files.size());
224         for (File file : files) {
225           logger.info("  File: " + file.getAbsolutePath());
226         }
227         logger.info(" The number of files matched by the second regular expression: " + files2.size());
228         for (File file : files2) {
229           logger.info("  File: " + file.getAbsolutePath());
230         }
231         return this;
232       }
233       Collections.sort(files2);
234     }
235 
236     // Create the Parallel Java execution Schedule.
237     try {
238       integerSchedule = IntegerSchedule.parse(schedule.toLowerCase());
239       logger.info(" Parallel Schedule: " + schedule);
240     } catch (Exception e) {
241       integerSchedule = IntegerSchedule.dynamic();
242       logger.info(" Parallel Schedule: Dynamic");
243     }
244 
245     // Create a WorkerTeam and then execute the ForEachFileRegion
246     WorkerTeam workerTeam = new WorkerTeam(world);
247     try {
248       workerTeam.execute(new ForEachFileRegion());
249     } catch (Exception e) {
250       logger.severe("Error executing ForEachFileRegion: " + e.getMessage());
251     }
252 
253     // Clear the pj.use.mpi flag.
254     System.clearProperty("pj.use.mpi");
255 
256     return this;
257   }
258 
259   /**
260    * ForEachFileRegion delegates work to ForEachFileLoop instances in Parallel Java processes.
261    */
262   private class ForEachFileRegion extends WorkerRegion {
263 
264     @Override
265     public void run() throws Exception {
266       int numFiles = files.size();
267       execute(0, numFiles - 1, new ForEachFileLoop());
268     }
269 
270   }
271 
272   /**
273    * ForEachFileLoop is executed in a Parallel Java process.
274    */
275   private class ForEachFileLoop extends WorkerIntegerForLoop {
276 
277     @Override
278     public IntegerSchedule schedule() {
279       return integerSchedule;
280     }
281 
282     @Override
283     public void run(int lb, int ub) throws Exception {
284       for (int i = lb; i <= ub; i++) {
285         File file = files.get(i);
286         if (!file.exists()) {
287           logger.info(format(" Ignoring file that does not exist: %s", file.getAbsolutePath()));
288           continue;
289         }
290         File dualTopologyFile = null;
291         if (twoFilesPerCommand) {
292           dualTopologyFile = files2.get(i);
293           if (!dualTopologyFile.exists()) {
294             logger.info(format(" Ignoring dual topology file that does not exist: %s", dualTopologyFile.getAbsolutePath()));
295             continue;
296           }
297         }
298 
299         String path = normalize(file.getAbsolutePath());
300         logger.info(format(" Current File: %s", path));
301 
302         String dualTopologyPath = null;
303         if (twoFilesPerCommand) {
304           dualTopologyPath = normalize(dualTopologyFile.getAbsolutePath());
305           logger.info(format(" Current Dual Topology File: %s", dualTopologyPath));
306         }
307 
308         List<String> commandArgs = new ArrayList<>();
309         // Pass along the unmatched parameters
310         boolean foundFile = false;
311         boolean found2File = false;
312         for (String arg : unmatched) {
313           if (arg.equalsIgnoreCase("FILE")) {
314             // Replace FILE with the current file path.
315             commandArgs.add(path);
316             foundFile = true;
317           } else if (twoFilesPerCommand && arg.equalsIgnoreCase("FILE2")) {
318             commandArgs.add(dualTopologyPath);
319             found2File = true;
320           } else {
321             commandArgs.add(arg);
322           }
323         }
324 
325         if (!foundFile) {
326           // Add the current file as the final argument.
327           commandArgs.add(path);
328         }
329         if (twoFilesPerCommand && !found2File) {
330           // Add the dual topology file as the final argument to a dual topology simulation.
331           commandArgs.add(dualTopologyPath);
332         }
333 
334         // Create a Binding for command line arguments.
335         FFXBinding binding = new FFXBinding();
336         binding.setVariable("args", commandArgs);
337 
338         // Create a new instance of the script and run it.
339         FFXCommand command = script.getDeclaredConstructor().newInstance();
340         command.setBinding(binding);
341 
342         try {
343           command.run();
344         } catch (Exception e) {
345           logger.info(format(" Exception for file: %s", path));
346           if (twoFilesPerCommand) {
347             logger.info(format(" Dual topology file: %s", dualTopologyPath));
348           }
349           if (verbose) {
350             logger.info(e.toString());
351             logger.info(Utilities.stackTraceToString(e));
352           }
353         }
354       }
355     }
356   }
357 
358   @Override
359   public List<Potential> getPotentials() {
360     return Collections.emptyList();
361   }
362 
363 }