1 //******************************************************************************
2 //
3 // File: Configuration.java
4 // Package: edu.rit.pj.cluster
5 // Unit: Class edu.rit.pj.cluster.Configuration
6 //
7 // This Java source file is copyright (C) 2012 by Alan Kaminsky. All rights
8 // reserved. For further information, contact the author, Alan Kaminsky, at
9 // ark@cs.rit.edu.
10 //
11 // This Java source file is part of the Parallel Java Library ("PJ"). PJ is free
12 // software; you can redistribute it and/or modify it under the terms of the GNU
13 // General Public License as published by the Free Software Foundation; either
14 // version 3 of the License, or (at your option) any later version.
15 //
16 // PJ is distributed in the hope that it will be useful, but WITHOUT ANY
17 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
18 // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
19 //
20 // Linking this library statically or dynamically with other modules is making a
21 // combined work based on this library. Thus, the terms and conditions of the GNU
22 // General Public License cover the whole combination.
23 //
24 // As a special exception, the copyright holders of this library give you
25 // permission to link this library with independent modules to produce an
26 // executable, regardless of the license terms of these independent modules, and
27 // to copy and distribute the resulting executable under terms of your choice,
28 // provided that you also meet, for each linked independent module, the terms
29 // and conditions of the license of that module. An independent module is a module
30 // which is not derived from or based on this library. If you modify this library,
31 // you may extend this exception to your version of the library, but you are not
32 // obligated to do so. If you do not wish to do so, delete this exception
33 // statement from your version.
34 //
35 // A copy of the GNU General Public License is provided in the file gpl.txt. You
36 // may also obtain a copy of the GNU General Public License on the World Wide
37 // Web at http://www.gnu.org/licenses/gpl.html.
38 //
39 //******************************************************************************
40 package edu.rit.pj.cluster;
41
42 import java.io.File;
43 import java.io.IOException;
44 import java.util.ArrayList;
45 import java.util.List;
46 import java.util.NoSuchElementException;
47 import java.util.Scanner;
48
49 /**
50 * Class Configuration provides configuration information about a parallel
51 * computer running Parallel Java. The configuration information is read from a
52 * plain text file. Each configuration file entry is on a single line. Lines
53 * beginning with <code>#</code> and blank lines are ignored. The order of the
54 * entries in the file does not matter (unless stated otherwise below). The
55 * items in each entry are separated by whitespace; there cannot be any
56 * whitespace within an item (unless stated otherwise below). The configuration
57 * file entries are:
58 * <UL>
59 *
60 * <LI>
61 * <code>cluster <name></code>
62 * <BR>The name of the cluster is <code><name></code>. The name may contain
63 * whitespace. This entry must be specified; there is no default.
64 *
65 * <LI>
66 * <code>logfile <file></code>
67 * <BR>The Job Scheduler will append log entries to the log file named
68 * <code><file></code>. This entry must be specified; there is no default.
69 *
70 * <LI>
71 * <code>webhost <host></code>
72 * <BR>The host name for the Job Scheduler's web interface is
73 * <code><host></code>. This entry must be specified; there is no default.
74 *
75 * <LI>
76 * <code>webport <port></code>
77 * <BR>The port number for the Job Scheduler's web interface is
78 * <code><port></code>. If not specified, the default port number is 8080.
79 *
80 * <LI>
81 * <code>schedulerhost <host></code>
82 * <BR>The host name to which the Job Scheduler listens for connections from job
83 * frontend processes is <code><host></code>. If not specified, the default is
84 * <code>"localhost"</code>.
85 *
86 * <LI>
87 * <code>schedulerport <port></code>
88 * <BR>The port number to which the Job Scheduler listens for connections from
89 * job frontend processes is <code><port></code>. If not specified, the
90 * default port number is 20617.
91 *
92 * <LI>
93 * <code>frontendhost <host></code>
94 * <BR>The host name to which job frontend processes listen for connections from
95 * job backend processes is <code><host></code>. This entry must be specified;
96 * there is no default.
97 *
98 * <LI>
99 * <code>backend <name> <cpus> <host> <jvm>
100 * <classpath> [<jvmflag> ...]</code>
101 * <BR>The parallel computer includes a backend node named
102 * <code><name></code> with <code><cpus></code> CPUs. The host name for SSH
103 * remote logins to the backend node is <code><host></code>. The full pathname
104 * for executing the Java Virtual Machine (JVM) on the backend node is
105 * <code><jvm></code>. The Java class path for the Parallel Java Library on
106 * the backend node is <code><classpath></code>. Each
107 * <code><jvmflag></code> (zero or more) gives a flag passed to the JVM on the
108 * command line. At least one of this entry must be specified.
109 *
110 * <LI>
111 * <code>backendshell <name> <shell command></code>
112 * <BR>On the backend node named <code><name></code>, use the given shell
113 * command string when starting a job backend process. This entry, if present,
114 * must appear after the corresponding <code>backend <name></code> entry. If
115 * this entry is omitted, the default shell command string is
116 * <code>"bash -l -c"</code>.
117 *
118 * <LI>
119 * <code>jobtime <time></code>
120 * <BR>The maximum time in seconds any Parallel Java job is allowed to run. The
121 * Job Scheduler will abort a job if it runs for this many seconds. If not
122 * specified, the default is not to impose a maximum time on jobs. <I>Note:</I>
123 * If the Job Scheduler is configured with a maximum job time and a particular
124 * job is given a maximum time with the <code>-Dpj.jobtime</code> property, the
125 * smaller of the Job Scheduler's maximum job time and the job's maximum time
126 * will be used for that job.
127 * </UL>
128 * <P>
129 * Here is an example of a configuration file:
130 *
131 * <TABLE BORDER=1>
132 * <CAPTION>Parallel Java Job Scheduler configuration file</CAPTION>
133 * <TR>
134 * <TD style="vertical-align:top;">
135 * <PRE> # Parallel Java Job Scheduler configuration file
136 * # Frontend node: tardis.cs.rit.edu
137 * # Backend nodes: dr00-dr09
138 *
139 * cluster RIT CS Tardis Hybrid SMP Cluster
140 * logfile /var/tmp/parajava/scheduler.log
141 * webhost tardis.cs.rit.edu
142 * webport 8080
143 * schedulerhost localhost
144 * schedulerport 20617
145 * frontendhost 10.10.221.1
146 * backend dr00 4 10.10.221.10 /usr/local/versions/jdk-1.5.0_15/bin/java /var/tmp/parajava/pj.jar
147 * backend dr01 4 10.10.221.11 /usr/local/versions/jdk-1.5.0_15/bin/java /var/tmp/parajava/pj.jar
148 * backend dr02 4 10.10.221.12 /usr/local/versions/jdk-1.5.0_15/bin/java /var/tmp/parajava/pj.jar
149 * backend dr03 4 10.10.221.13 /usr/local/versions/jdk-1.5.0_15/bin/java /var/tmp/parajava/pj.jar
150 * backend dr04 4 10.10.221.14 /usr/local/versions/jdk-1.5.0_15/bin/java /var/tmp/parajava/pj.jar
151 * backend dr05 4 10.10.221.15 /usr/local/versions/jdk-1.5.0_15/bin/java /var/tmp/parajava/pj.jar
152 * backend dr06 4 10.10.221.16 /usr/local/versions/jdk-1.5.0_15/bin/java /var/tmp/parajava/pj.jar
153 * backend dr07 4 10.10.221.17 /usr/local/versions/jdk-1.5.0_15/bin/java /var/tmp/parajava/pj.jar
154 * backend dr08 4 10.10.221.18 /usr/local/versions/jdk-1.5.0_15/bin/java /var/tmp/parajava/pj.jar
155 * backend dr09 4 10.10.221.19 /usr/local/versions/jdk-1.5.0_15/bin/java /var/tmp/parajava/pj.jar</PRE>
156 * </TD>
157 * </TR>
158 * </TABLE>
159 *
160 * @author Alan Kaminsky
161 * @version 20-Jun-2012
162 */
163 public class Configuration {
164
165 // Hidden data members.
166 // Cluster name.
167 private String myClusterName;
168
169 // Log file.
170 private String myLogFile;
171
172 // Web interface host and port.
173 private String myWebHost = Constants.ALL_NETWORK_INTERFACES;
174 private int myWebPort = Constants.WEB_PORT;
175
176 // Job Scheduler host and port.
177 private String mySchedulerHost = "localhost";
178 private int mySchedulerPort = Constants.PJ_PORT;
179
180 // Frontend host.
181 private String myFrontendHost;
182
183 // List of backend information objects.
184 private ArrayList<BackendInfo> myBackendInfo
185 = new ArrayList<BackendInfo>();
186
187 // Default shell comand string.
188 private static final String DEFAULT_SHELL_COMMAND = "bash -l -c";
189
190 // Maximum job time. 0 means no maximum.
191 private int myJobTime;
192
193 // Exported constructors.
194 /**
195 * Construct a new configuration. The configuration information is read from
196 * the given file.
197 *
198 * @param configfile Configuration file name.
199 * @exception IOException Thrown if an I/O error occurred while reading the
200 * configuration file. Thrown if there was an error in the configuration
201 * file.
202 * @throws java.io.IOException if any.
203 */
204 public Configuration(String configfile)
205 throws IOException {
206 parseConfigFile(configfile);
207 }
208
209 // Exported operations.
210 /**
211 * Returns the cluster name.
212 *
213 * @return Cluster name.
214 */
215 public String getClusterName() {
216 return myClusterName;
217 }
218
219 /**
220 * Returns the Job Scheduler's log file name.
221 *
222 * @return Log file name.
223 */
224 public String getLogFile() {
225 return myLogFile;
226 }
227
228 /**
229 * Returns the Job Scheduler's web interface host name.
230 *
231 * @return Host name.
232 */
233 public String getWebHost() {
234 return myWebHost;
235 }
236
237 /**
238 * Returns the Job Scheduler's web interface port number.
239 *
240 * @return Port number.
241 */
242 public int getWebPort() {
243 return myWebPort;
244 }
245
246 /**
247 * Returns the Job Scheduler's channel group host name. To send messages to
248 * the Job Scheduler, a job frontend connects a channel to this host.
249 *
250 * @return Host name.
251 */
252 public String getSchedulerHost() {
253 return mySchedulerHost;
254 }
255
256 /**
257 * Returns the Job Scheduler's channel group port number. To send messages
258 * to the Job Scheduler, a job frontend connects a channel to this port.
259 *
260 * @return Port number.
261 */
262 public int getSchedulerPort() {
263 return mySchedulerPort;
264 }
265
266 /**
267 * Returns the host name of the cluster's frontend processor.
268 *
269 * @return Host name.
270 */
271 public String getFrontendHost() {
272 return myFrontendHost;
273 }
274
275 /**
276 * Returns the number of backend processors.
277 *
278 * @return Count.
279 */
280 public int getBackendCount() {
281 return myBackendInfo.size();
282 }
283
284 /**
285 * Returns information about the given backend processor.
286 *
287 * @param i Index in the range 0 .. <code>getBackendCount()-1</code>.
288 * @return Backend information object.
289 */
290 public BackendInfo getBackendInfo(int i) {
291 return myBackendInfo.get(i);
292 }
293
294 /**
295 * Returns information about all backend processors.
296 *
297 * @return List of backend information objects.
298 */
299 public List<BackendInfo> getBackendInfoList() {
300 return myBackendInfo;
301 }
302
303 /**
304 * Returns the maximum job time.
305 *
306 * @return Maximum job time (seconds), or 0 if no maximum.
307 */
308 public int getJobTime() {
309 return myJobTime;
310 }
311
312 // Hidden operations.
313 /**
314 * Parse the configuration file.
315 *
316 * @param configfile Configuration file name.
317 *
318 * @exception IOException Thrown if an I/O error occurred.
319 */
320 private void parseConfigFile(String configfile)
321 throws IOException {
322 Scanner scanner = null;
323 String line = null;
324 long now = System.currentTimeMillis();
325 try {
326 scanner = new Scanner(new File(configfile));
327 lineloop:
328 while (scanner.hasNextLine()) {
329 line = scanner.nextLine();
330 Scanner linescanner = new Scanner(line);
331 if (!linescanner.hasNext()) {
332 continue lineloop;
333 }
334 String command = linescanner.next();
335 if (command.charAt(0) == '#') {
336 } else if (command.equals("cluster")) {
337 myClusterName = linescanner.nextLine().trim();
338 } else if (command.equals("logfile")) {
339 myLogFile = linescanner.next();
340 } else if (command.equals("webhost")) {
341 myWebHost = linescanner.next();
342 } else if (command.equals("webport")) {
343 myWebPort = Integer.parseInt(linescanner.next());
344 } else if (command.equals("schedulerhost")) {
345 mySchedulerHost = linescanner.next();
346 } else if (command.equals("schedulerport")) {
347 mySchedulerPort = Integer.parseInt(linescanner.next());
348 } else if (command.equals("frontendhost")) {
349 myFrontendHost = linescanner.next();
350 } else if (command.equals("backend")) {
351 String name = linescanner.next();
352 int cpus = linescanner.nextInt();
353 if (cpus < 1) {
354 throw new IOException("Invalid backend command, <cpus> must be >= 1: "
355 + line);
356 }
357 String host = linescanner.next();
358 String jvm = linescanner.next();
359 String classpath = linescanner.next();
360 ArrayList<String> jvmflags = new ArrayList<String>();
361 while (linescanner.hasNext()) {
362 jvmflags.add(linescanner.next());
363 }
364 BackendInfo backendinfo = new BackendInfo(name, cpus, BackendInfo.State.IDLE,
365 now, host, jvm, classpath, jvmflags.toArray(new String[0]), DEFAULT_SHELL_COMMAND);
366 myBackendInfo.add(backendinfo);
367 } else if (command.equals("backendshell")) {
368 String name = linescanner.next();
369 String shellCommand = linescanner.nextLine().trim();
370 BackendInfo backendinfo = backendInfoForName(name);
371 if (backendinfo == null) {
372 throw new IOException("Invalid backendshell command, no backend named \""
373 + name + "\"");
374 }
375 backendinfo.shellCommand = shellCommand;
376 } else if (command.equals("jobtime")) {
377 int time = linescanner.nextInt();
378 if (time < 1) {
379 throw new IOException("Invalid configuration command: " + line);
380 }
381 myJobTime = time;
382 } else {
383 throw new IOException("Invalid configuration command: " + line);
384 }
385 }
386 if (myClusterName == null) {
387 throw new IOException("Missing configuration command: cluster <name>");
388 }
389 if (myLogFile == null) {
390 throw new IOException("Missing configuration command: logfile <file>");
391 }
392 if (myWebHost == null) {
393 throw new IOException("Missing configuration command: webhost <host>");
394 }
395 if (myFrontendHost == null) {
396 throw new IOException("Missing configuration command: frontendhost <host>");
397 }
398 if (myBackendInfo.isEmpty()) {
399 throw new IOException("Missing configuration command: backend <name> <host> <port>");
400 }
401 } catch (NoSuchElementException exc) {
402 throw new IOException("Invalid configuration command: " + line);
403 } catch (NumberFormatException exc) {
404 throw new IOException("Invalid configuration command: " + line);
405 } finally {
406 if (scanner != null) {
407 scanner.close();
408 }
409 }
410 }
411
412 // Hidden operations.
413 /**
414 * Returns the backend info object for the given backend name.
415 *
416 * @param name Backend name.
417 *
418 * @return Backend info, or null if <code>name</code> does not exist.
419 */
420 private BackendInfo backendInfoForName(String name) {
421 for (BackendInfo backendinfo : myBackendInfo) {
422 if (backendinfo.name.equals(name)) {
423 return backendinfo;
424 }
425 }
426 return null;
427 }
428
429 }