View Javadoc
1   //******************************************************************************
2   //
3   // File:    JobScheduler.java
4   // Package: edu.rit.pj.cluster
5   // Unit:    Class edu.rit.pj.cluster.JobScheduler
6   //
7   // This Java source file is copyright (C) 2012 by Alan Kaminsky. All rights
8   // reserved. For further information, contact the author, Alan Kaminsky, at
9   // ark@cs.rit.edu.
10  //
11  // This Java source file is part of the Parallel Java Library ("PJ"). PJ is free
12  // software; you can redistribute it and/or modify it under the terms of the GNU
13  // General Public License as published by the Free Software Foundation; either
14  // version 3 of the License, or (at your option) any later version.
15  //
16  // PJ is distributed in the hope that it will be useful, but WITHOUT ANY
17  // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
18  // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
19  //
20  // Linking this library statically or dynamically with other modules is making a
21  // combined work based on this library. Thus, the terms and conditions of the GNU
22  // General Public License cover the whole combination.
23  //
24  // As a special exception, the copyright holders of this library give you
25  // permission to link this library with independent modules to produce an
26  // executable, regardless of the license terms of these independent modules, and
27  // to copy and distribute the resulting executable under terms of your choice,
28  // provided that you also meet, for each linked independent module, the terms
29  // and conditions of the license of that module. An independent module is a module
30  // which is not derived from or based on this library. If you modify this library,
31  // you may extend this exception to your version of the library, but you are not
32  // obligated to do so. If you do not wish to do so, delete this exception
33  // statement from your version.
34  //
35  // A copy of the GNU General Public License is provided in the file gpl.txt. You
36  // may also obtain a copy of the GNU General Public License on the World Wide
37  // Web at http://www.gnu.org/licenses/gpl.html.
38  //
39  //******************************************************************************
40  package edu.rit.pj.cluster;
41  
42  import java.io.FileOutputStream;
43  import java.io.IOException;
44  import java.io.PrintStream;
45  import java.io.PrintWriter;
46  import java.net.InetSocketAddress;
47  import java.util.Date;
48  import java.util.HashMap;
49  import java.util.Iterator;
50  import java.util.LinkedList;
51  import java.util.List;
52  import java.util.Map;
53  
54  import edu.rit.http.HttpRequest;
55  import edu.rit.http.HttpResponse;
56  import edu.rit.http.HttpServer;
57  import edu.rit.mp.Channel;
58  import edu.rit.mp.ChannelGroup;
59  import edu.rit.mp.ChannelGroupClosedException;
60  import edu.rit.mp.ConnectListener;
61  import edu.rit.mp.ObjectBuf;
62  import edu.rit.mp.Status;
63  import edu.rit.mp.buf.ObjectItemBuf;
64  import edu.rit.pj.Version;
65  import edu.rit.util.Logger;
66  import edu.rit.util.PrintStreamLogger;
67  import edu.rit.util.Timer;
68  import edu.rit.util.TimerTask;
69  import edu.rit.util.TimerThread;
70  
71  /**
72   * Class JobScheduler is the main program for the PJ Job Scheduler Daemon
73   * process for a parallel computer.
74   * <P>
75   * Run the Job Scheduler Daemon on the cluster's frontend processor by typing
76   * this command:
77   * <P>
78   * java edu.rit.pj.cluster.JobScheduler <I>configfile</I>
79   * <BR><I>configfile</I> = Configuration file name
80   * <P>
81   * For further information about the configuration file, see class {@linkplain
82   * Configuration}.
83   *
84   * @author Alan Kaminsky
85   * @version 20-Jun-2012
86   */
87  public class JobScheduler
88          implements JobSchedulerRef {
89  
90  // Hidden data members.
91      // Cluster name.
92      private String myClusterName;
93  
94      // Log file.
95      private Logger myLog;
96  
97      // Web interface host and port.
98      private String myWebHost;
99      private int myWebPort;
100 
101     // Job Scheduler host and port.
102     private String mySchedulerHost;
103     private int mySchedulerPort;
104 
105     // Job frontend host.
106     private String myFrontendHost;
107 
108     // Maximum job time, or 0 if no maximum.
109     private int myJobTime;
110 
111     // Mapping from backend processor name to backend info.
112     private Map<String, BackendInfo> myNameToBackendMap
113             = new HashMap<String, BackendInfo>();
114 
115     // Array of backend info records.
116     private BackendInfo[] myBackendInfo;
117     private int myBackendCount;
118 
119     // Next backend number to assign to a job.
120     private int myNextBackendNumber = 0;
121 
122     // Next job number.
123     private int myNextJobNumber = 1;
124 
125     // Mapping from job frontend to job info.
126     private Map<JobFrontendRef, JobInfo> myFrontendToJobMap
127             = new HashMap<JobFrontendRef, JobInfo>();
128 
129     // Queue of running jobs.
130     private List<JobInfo> myRunningJobList
131             = new LinkedList<JobInfo>();
132 
133     // Queue of waiting jobs.
134     private List<JobInfo> myWaitingJobList
135             = new LinkedList<JobInfo>();
136 
137     // Timer thread for lease renewals and expirations.
138     private TimerThread myLeaseTimerThread;
139 
140     // Channel group for communicating with job frontend processes.
141     private ChannelGroup myChannelGroup;
142 
143     // Server for web interface.
144     private HttpServer myHttpServer;
145 
146     // Total compute time (msec) of all jobs.
147     private long myTotalComputeTime;
148 
149     // Date and time when Job Scheduler started.
150     private long myStartDateTime;
151 
152 // Hidden constructors.
153     /**
154      * Construct a new Job Scheduler Daemon.
155      *
156      * @param configfile Configuration file name.
157      *
158      * @exception IOException Thrown if an I/O error occurred.
159      */
160     private JobScheduler(String configfile)
161             throws IOException {
162         long now = System.currentTimeMillis();
163         myStartDateTime = now;
164 
165         // Parse configuration file.
166         Configuration config = new Configuration(configfile);
167         myClusterName = config.getClusterName();
168         myLog
169                 = new PrintStreamLogger(new PrintStream(new FileOutputStream(config.getLogFile(), true),
170                                 true));
171         myWebHost = config.getWebHost();
172         myWebPort = config.getWebPort();
173         mySchedulerHost = config.getSchedulerHost();
174         mySchedulerPort = config.getSchedulerPort();
175         myFrontendHost = config.getFrontendHost();
176         myJobTime = config.getJobTime();
177         myBackendCount = config.getBackendCount();
178         myBackendInfo = new BackendInfo[myBackendCount];
179         for (int i = 0; i < myBackendCount; ++i) {
180             BackendInfo backendinfo = config.getBackendInfo(i);
181             myNameToBackendMap.put(backendinfo.name, backendinfo);
182             myBackendInfo[i] = backendinfo;
183         }
184 
185         // Log startup.
186         myLog.log(now, "Started " + Version.PJ_VERSION);
187 
188         // Set up shutdown hook.
189         Runtime.getRuntime().addShutdownHook(new Thread() {
190             public void run() {
191                 shutdown();
192             }
193         });
194 
195         // Set up lease timer thread.
196         myLeaseTimerThread = new TimerThread();
197         myLeaseTimerThread.setDaemon(true);
198         myLeaseTimerThread.start();
199 
200         // Set up channel group.
201         myChannelGroup
202                 = new ChannelGroup(new InetSocketAddress(mySchedulerHost, mySchedulerPort),
203                         myLog);
204         myLog.log(now, "Job Scheduler at " + myChannelGroup.listenAddress());
205         myChannelGroup.setConnectListener(new ConnectListener() {
206             public void nearEndConnected(ChannelGroup theChannelGroup,
207                     Channel theChannel) {
208             }
209 
210             public void farEndConnected(ChannelGroup theChannelGroup,
211                     Channel theChannel) {
212                 createJob(theChannel);
213             }
214         });
215 
216         // Set up server for web interface.
217         myHttpServer
218                 = new HttpServer(new InetSocketAddress(myWebHost, myWebPort), myLog) {
219                     protected void process(HttpRequest request,
220                             HttpResponse response)
221                     throws IOException {
222                         processHttpRequest(request, response);
223                     }
224                 };
225         myLog.log(now, "Web interface at " + myHttpServer.getAddress());
226 
227         // Log backend nodes.
228         for (BackendInfo backend : myBackendInfo) {
229             myLog.log(now,
230                     "Backend " + backend.name + " at " + backend.host
231                     + ", " + backend.totalCpus
232                     + " CPU" + (backend.totalCpus == 1 ? "" : "s"));
233         }
234 
235         // Start accepting jobs.
236         myChannelGroup.startListening();
237     }
238 
239 // Hidden operations.
240     /**
241      * Create a job associated with the given channel.
242      *
243      * @param theChannel Channel for talking to Job Frontend process.
244      */
245     private synchronized void createJob(Channel theChannel) {
246         // Create Job Frontend proxy object for the channel.
247         JobFrontendRef frontend
248                 = new JobFrontendProxy(myChannelGroup, theChannel);
249         theChannel.info(frontend);
250 
251         // Create job information record.
252         JobInfo jobinfo = getJobInfo(frontend);
253 
254         // Start lease timers.
255         jobinfo.renewTimer.start(Constants.LEASE_RENEW_INTERVAL,
256                 Constants.LEASE_RENEW_INTERVAL);
257         jobinfo.expireTimer.start(Constants.LEASE_EXPIRE_INTERVAL);
258     }
259 
260     /**
261      * Run this Job Scheduler.
262      */
263     private void run() {
264         ObjectItemBuf<JobSchedulerMessage> buf
265                 = ObjectBuf.buffer((JobSchedulerMessage) null);
266         Status status = null;
267         JobSchedulerMessage message = null;
268         JobFrontendRef frontend = null;
269 
270         receiveloop:
271         for (;;) {
272             // Receive a message from any channel.
273             try {
274                 status = myChannelGroup.receive(null, null, buf);
275             } catch (ChannelGroupClosedException exc) {
276                 // Normal termination.
277                 break receiveloop;
278             } catch (Throwable exc) {
279                 myLog.log("Exception while receiving message", exc);
280                 break receiveloop;
281             }
282             message = buf.item;
283 
284             // Get job frontend proxy associated with channel.
285             frontend = (JobFrontendRef) status.channel.info();
286 
287             // Process message.
288             try {
289                 message.invoke(this, frontend);
290             } catch (Throwable exc) {
291                 myLog.log("Exception while processing message", exc);
292             }
293 
294             // Enable garbage collection of no-longer-needed objects while
295             // waiting to receive next message.
296             buf.item = null;
297             status = null;
298             message = null;
299             frontend = null;
300         }
301     }
302 
303 // Exported operations.
304     /**
305      * {@inheritDoc}
306      *
307      * Report that a backend node failed.
308      * @exception IOException Thrown if an I/O error occurred.
309      */
310     public synchronized void backendFailed(JobFrontendRef theJobFrontend,
311             String name)
312             throws IOException {
313         BackendInfo backendinfo = myNameToBackendMap.get(name);
314         if (backendinfo != null) {
315             long now = System.currentTimeMillis();
316             myLog.log(now, "Backend " + name + " failed");
317 //			if (backendinfo.state != BackendInfo.State.FAILED)
318 //				{
319 //				/*TBD*/ Cancel any reserved or running job
320 //				backendinfo.state = BackendInfo.State.FAILED;
321 //				backendinfo.stateTime = now;
322 //				backendinfo.job = null;
323 //				assignResourcesToJobs (now);
324 //				}
325         }
326     }
327 
328     /**
329      * {@inheritDoc}
330      *
331      * Cancel a job.
332      * @exception IOException Thrown if an I/O error occurred.
333      */
334     public synchronized void cancelJob(JobFrontendRef theJobFrontend,
335             String errmsg)
336             throws IOException {
337         JobInfo jobinfo = getJobInfo(theJobFrontend);
338         doCancelJob(System.currentTimeMillis(), jobinfo, errmsg);
339     }
340 
341     /**
342      * {@inheritDoc}
343      *
344      * Report that a job finished.
345      * @exception IOException Thrown if an I/O error occurred.
346      */
347     public synchronized void jobFinished(JobFrontendRef theJobFrontend)
348             throws IOException {
349         JobInfo jobinfo = getJobInfo(theJobFrontend);
350         doFinishJob(System.currentTimeMillis(), jobinfo);
351     }
352 
353     /**
354      * {@inheritDoc}
355      *
356      * Renew the lease on a job.
357      * @exception IOException Thrown if an I/O error occurred.
358      */
359     public synchronized void renewLease(JobFrontendRef theJobFrontend)
360             throws IOException {
361         JobInfo jobinfo = getJobInfo(theJobFrontend);
362         jobinfo.expireTimer.start(Constants.LEASE_EXPIRE_INTERVAL);
363     }
364 
365     /**
366      * {@inheritDoc}
367      *
368      * Report a comment for a process.
369      */
370     public synchronized void reportComment(JobFrontendRef theJobFrontend,
371             int rank,
372             String comment) {
373         JobInfo jobinfo = getJobInfo(theJobFrontend);
374         jobinfo.comment[rank] = comment;
375     }
376 
377     /**
378      * {@inheritDoc}
379      *
380      * Request that a job be scheduled.
381      * @exception IOException Thrown if an I/O error occurred.
382      */
383     public synchronized void requestJob(JobFrontendRef theJobFrontend,
384             String username,
385             int Nn,
386             int Np,
387             int Nt)
388             throws IOException {
389         JobInfo jobinfo = getJobInfo(theJobFrontend);
390         long now = System.currentTimeMillis();
391         myLog.log(now,
392                 "Job " + jobinfo.jobnum + " queued, username=" + username
393                 + ", nn=" + Nn + ", np=" + Np + ", nt=" + Nt);
394 
395         // Record job parameters.
396         jobinfo.username = username;
397         jobinfo.Nn = Math.min(Nn, Np);
398         jobinfo.Np = Np;
399         jobinfo.Nt = Nt;
400         jobinfo.backend = new BackendInfo[Np];
401         jobinfo.cpus = new int[Np];
402         jobinfo.comment = new String[Np];
403         for (int i = 0; i < Np; ++i) {
404             jobinfo.comment[i] = "";
405         }
406 
407         // If the cluster doesn't have enough resources, cancel the job.
408         if (!enoughResourcesForJob(jobinfo.Nn, jobinfo.Np, jobinfo.Nt)) {
409             doCancelJobTooFewResources(now, jobinfo);
410             return;
411         }
412 
413         // Add job to queue of waiting jobs.
414         myWaitingJobList.add(jobinfo);
415 
416         // Inform job frontend of job number.
417         theJobFrontend.assignJobNumber(this, jobinfo.jobnum, myFrontendHost);
418 
419         // Assign idle nodes to waiting jobs.
420         assignResourcesToJobs(now);
421     }
422 
423     /**
424      * Close communication with this Job Scheduler.
425      */
426     public void close() {
427     }
428 
429 // More hidden operations.
430     /**
431      * Take action when a job's lease renewal timer times out.
432      *
433      * @param theJobFrontend Job frontend that is calling this method.
434      *
435      * @exception IOException Thrown if an I/O error occurred.
436      */
437     private synchronized void renewTimeout(Timer theTimer,
438             JobFrontendRef theJobFrontend)
439             throws IOException {
440         if (theTimer.isTriggered()) {
441             theJobFrontend.renewLease(this);
442         }
443     }
444 
445     /**
446      * Take action when a job's lease expiration timer times out.
447      *
448      * @param theJobFrontend Job frontend that is calling this method.
449      *
450      * @exception IOException Thrown if an I/O error occurred.
451      */
452     private synchronized void expireTimeout(Timer theTimer,
453             JobFrontendRef theJobFrontend)
454             throws IOException {
455         if (theTimer.isTriggered()) {
456             JobInfo jobinfo = getJobInfo(theJobFrontend);
457             doCancelJob(System.currentTimeMillis(),
458                     jobinfo,
459                     "Job frontend lease expired");
460         }
461     }
462 
463     /**
464      * Take action when a job's maximum job time timer times out.
465      *
466      * @param theJobFrontend Job frontend that is calling this method.
467      *
468      * @exception IOException Thrown if an I/O error occurred.
469      */
470     private synchronized void jobTimeout(Timer theTimer,
471             JobFrontendRef theJobFrontend)
472             throws IOException {
473         if (theTimer.isTriggered()) {
474             JobInfo jobinfo = getJobInfo(theJobFrontend);
475             String errmsg
476                     = "Maximum job time (" + myJobTime + " seconds) exceeded";
477             jobinfo.frontend.cancelJob(this, errmsg);
478             doCancelJob(System.currentTimeMillis(), jobinfo, errmsg);
479         }
480     }
481 
482     /**
483      * Get the job info record associated with the given job frontend.
484      *
485      * @param frontend Job frontend.
486      *
487      * @return Job info record.
488      */
489     private JobInfo getJobInfo(JobFrontendRef frontend) {
490         final JobFrontendRef fe = frontend;
491         JobInfo jobinfo = myFrontendToJobMap.get(frontend);
492         if (jobinfo == null) {
493             jobinfo = new JobInfo(/*jobnum   */myNextJobNumber++,
494                     /*state    */ JobInfo.State.WAITING,
495                     /*stateTime*/ System.currentTimeMillis(),
496                     /*username */ null,
497                     /*Nn       */ 0,
498                     /*Np       */ 0,
499                     /*Nt       */ 0,
500                     /*count    */ 0,
501                     /*backend  */ null,
502                     /*cpus     */ null,
503                     /*nodeCount*/ 0,
504                     /*frontend */ fe,
505                     /*renewTimer*/
506                     myLeaseTimerThread.createTimer(new TimerTask() {
507                         public void action(Timer theTimer) {
508                             try {
509                                 renewTimeout(theTimer, fe);
510                             } catch (Throwable exc) {
511                                 myLog.log(exc);
512                             }
513                         }
514                     }),
515                     /*expireTimer*/
516                     myLeaseTimerThread.createTimer(new TimerTask() {
517                         public void action(Timer theTimer) {
518                             try {
519                                 expireTimeout(theTimer, fe);
520                             } catch (Throwable exc) {
521                                 myLog.log(exc);
522                             }
523                         }
524                     }),
525                     /*jobTimer*/
526                     myLeaseTimerThread.createTimer(new TimerTask() {
527                         public void action(Timer theTimer) {
528                             try {
529                                 jobTimeout(theTimer, fe);
530                             } catch (Throwable exc) {
531                                 myLog.log(exc);
532                             }
533                         }
534                     }));
535             myFrontendToJobMap.put(frontend, jobinfo);
536         }
537         return jobinfo;
538     }
539 
540     /**
541      * Finish the given job.
542      *
543      * @param now Current time.
544      * @param jobinfo Job info record.
545      *
546      * @exception IOException Thrown if an I/O error occurred.
547      */
548     private void doFinishJob(long now,
549             JobInfo jobinfo)
550             throws IOException {
551         myLog.log(now, "Job " + jobinfo.jobnum + " finished");
552         doCleanupJob(now, jobinfo);
553     }
554 
555     /**
556      * Cancel the given job.
557      *
558      * @param now Current time.
559      * @param jobinfo Job info record.
560      * @param errmsg Error message.
561      *
562      * @exception IOException Thrown if an I/O error occurred.
563      */
564     private void doCancelJob(long now,
565             JobInfo jobinfo,
566             String errmsg)
567             throws IOException {
568         myLog.log(now, "Job " + jobinfo.jobnum + " canceled: " + errmsg);
569         doCleanupJob(now, jobinfo);
570     }
571 
572     /**
573      * Cancel the given job because of too few resources.
574      *
575      * @param now Current time.
576      * @param jobinfo Job info record.
577      *
578      * @exception IOException Thrown if an I/O error occurred.
579      */
580     private void doCancelJobTooFewResources(long now,
581             JobInfo jobinfo)
582             throws IOException {
583         String errmsg;
584         if (jobinfo.Nt == 0) {
585             errmsg
586                     = "Too few resources available to assign "
587                     + jobinfo.Nn + " node" + (jobinfo.Nn == 1 ? "" : "s") + " and "
588                     + jobinfo.Np + " process" + (jobinfo.Np == 1 ? "" : "es");
589         } else {
590             errmsg
591                     = "Too few resources available to assign "
592                     + jobinfo.Nn + " node" + (jobinfo.Nn == 1 ? "" : "s") + ", "
593                     + jobinfo.Np + " process" + (jobinfo.Np == 1 ? "" : "es") + ", and "
594                     + jobinfo.Nt + " CPU" + (jobinfo.Nt == 1 ? "" : "s") + " per process";
595         }
596         jobinfo.frontend.cancelJob(this, errmsg);
597         doCancelJob(now, jobinfo, errmsg);
598     }
599 
600     /**
601      * Clean up the given job.
602      *
603      * @param now Current time.
604      * @param jobinfo Job info record.
605      *
606      * @exception IOException Thrown if an I/O error occurred.
607      */
608     private void doCleanupJob(long now,
609             JobInfo jobinfo)
610             throws IOException {
611         // Stop lease timers.
612         jobinfo.renewTimer.stop();
613         jobinfo.expireTimer.stop();
614         jobinfo.jobTimer.stop();
615 
616         // Stop communication with job frontend.
617         jobinfo.frontend.close();
618 
619         // Remove job from queues.
620         myFrontendToJobMap.remove(jobinfo.frontend);
621         myRunningJobList.remove(jobinfo);
622         myWaitingJobList.remove(jobinfo);
623 
624         // Make each of the job's nodes idle (but not failed nodes).
625         for (int i = 0; i < jobinfo.count; ++i) {
626             BackendInfo backendinfo = jobinfo.backend[i];
627             if (backendinfo.state != BackendInfo.State.FAILED) {
628                 backendinfo.state = BackendInfo.State.IDLE;
629                 backendinfo.stateTime = now;
630                 backendinfo.job = null;
631             }
632         }
633 
634         // Update total compute time.
635         myTotalComputeTime += (now - jobinfo.stateTime);
636 
637         // Assign idle nodes to waiting jobs.
638         assignResourcesToJobs(now);
639     }
640 
641     /**
642      * Assign idle nodes to waiting jobs.
643      *
644      * @param now Current time.
645      *
646      * @exception IOException Thrown if an I/O error occurred.
647      */
648     private void assignResourcesToJobs(long now)
649             throws IOException {
650         // List of jobs to be canceled.
651         List<JobInfo> cancelList = new LinkedList<JobInfo>();
652 
653         // Decide what to do with each waiting job.
654         Iterator<JobInfo> iter = myWaitingJobList.iterator();
655         jobLoop:
656         while (iter.hasNext()) {
657             JobInfo jobinfo = iter.next();
658 
659             // If the cluster doesn't have enough resources, don't try to
660             // reserve any.
661             if (!enoughResourcesForJob(jobinfo.Nn, jobinfo.Np, jobinfo.Nt)) {
662                 iter.remove();
663                 cancelList.add(jobinfo);
664                 continue jobLoop;
665             }
666 
667             // Used to decide how many processes for each node.
668             int Np_div_Nn = jobinfo.Np / jobinfo.Nn;
669             int Np_rem_Nn = jobinfo.Np % jobinfo.Nn;
670 
671             // Reserve idle nodes for this job until there are no more idle
672             // nodes or this job has all the nodes it needs.
673             int be = myNextBackendNumber;
674             do {
675                 // Decide how many processes for this node.
676                 int Nproc = Np_div_Nn;
677                 if (jobinfo.nodeCount < Np_rem_Nn) {
678                     ++Nproc;
679                 }
680 
681                 // Reserve this node only if it is idle and it has enough CPUs.
682                 BackendInfo backendinfo = myBackendInfo[be];
683                 if (backendinfo.state == BackendInfo.State.IDLE
684                         && backendinfo.totalCpus >= Nproc) {
685                     // Reserve node.
686                     backendinfo.state = BackendInfo.State.RESERVED;
687                     backendinfo.stateTime = now;
688                     backendinfo.job = jobinfo;
689 
690                     // Used to decide how many CPUs for each process.
691                     int Nt_div_Nproc = backendinfo.totalCpus / Nproc;
692                     int Nt_rem_Nproc = backendinfo.totalCpus % Nproc;
693 
694                     // Assign Np processes.
695                     for (int i = 0; i < Nproc; ++i) {
696                         // Decide how many CPUs for this process.
697                         int Ncpus = jobinfo.Nt;
698                         if (Ncpus == 0) {
699                             Ncpus = Nt_div_Nproc;
700                             if (i < Nt_rem_Nproc) {
701                                 ++Ncpus;
702                             }
703                         }
704 
705                         // Log information.
706                         myLog.log(now,
707                                 "Job " + jobinfo.jobnum + " assigned "
708                                 + backendinfo.name + ", rank=" + jobinfo.count
709                                 + ", CPUs=" + Ncpus);
710 
711                         // Record information about process.
712                         jobinfo.backend[jobinfo.count] = backendinfo;
713                         jobinfo.cpus[jobinfo.count] = Ncpus;
714                         ++jobinfo.count;
715 
716                         // Inform Job Frontend.
717                         jobinfo.frontend.assignBackend(/*theJobScheduler*/this,
718                                 /*name           */ backendinfo.name,
719                                 /*host           */ backendinfo.host,
720                                 /*jvm            */ backendinfo.jvm,
721                                 /*classpath      */ backendinfo.classpath,
722                                 /*jvmflags       */ backendinfo.jvmflags,
723                                 /*shellCommand   */ backendinfo.shellCommand,
724                                 /*Nt             */ Ncpus);
725                     }
726 
727                     // Assign one node.
728                     ++jobinfo.nodeCount;
729                 }
730 
731                 // Consider next node.
732                 be = (be + 1) % myBackendCount;
733             } while (be != myNextBackendNumber && jobinfo.count < jobinfo.Np);
734             myNextBackendNumber = be;
735 
736             // If this job now has Np processes, start running this job.
737             if (jobinfo.count == jobinfo.Np) {
738                 // Log information.
739                 myLog.log(now, "Job " + jobinfo.jobnum + " started");
740 
741                 // Mark job as running.
742                 iter.remove();
743                 myRunningJobList.add(jobinfo);
744                 jobinfo.state = JobInfo.State.RUNNING;
745                 jobinfo.stateTime = now;
746 
747                 // Mark all the job's nodes as running.
748                 for (BackendInfo backendinfo : jobinfo.backend) {
749                     backendinfo.state = BackendInfo.State.RUNNING;
750                     backendinfo.stateTime = now;
751                 }
752 
753                 // If the Job Scheduler is imposing a maximum job time, start
754                 // job timer.
755                 if (myJobTime > 0) {
756                     jobinfo.jobTimer.start(myJobTime * 1000L);
757                 }
758             } // If this job does not yet have Np processes, don't schedule any
759             // further jobs.
760             else {
761                 break jobLoop;
762             }
763         }
764 
765         // Cancel jobs for which there are insufficient resources.
766         for (JobInfo jobinfo : cancelList) {
767             doCancelJobTooFewResources(now, jobinfo);
768         }
769     }
770 
771     /**
772      * Determine if there are enough resources to run a job.
773      *
774      * @param Nn Number of backend nodes required.
775      * @param Np Number of processes required.
776      * @param Nt Number of CPUs per process required. 0 means "all CPUs."
777      *
778      * @return True if there are enough resources, false if not.
779      */
780     private boolean enoughResourcesForJob(int Nn,
781             int Np,
782             int Nt) {
783         // Determine worst-case processes per node.
784         int Ppn = (Np + Nn - 1) / Nn;
785 
786         // If number of CPUs per process is "all CPUs," assume one CPU per
787         // process.
788         if (Nt == 0) {
789             Nt = 1;
790         }
791 
792         // Count how many nodes meet the requirements.
793         int nodeCount = 0;
794         for (BackendInfo backendinfo : myBackendInfo) {
795             // The node must not have failed.
796             if (backendinfo.state != BackendInfo.State.FAILED
797                     && // The node must have at least Ppn*Nt CPUs.
798                     backendinfo.totalCpus >= Ppn * Nt) {
799                 // The node meets the requirements.
800                 ++nodeCount;
801             }
802         }
803 
804         // Return outcome.
805         return nodeCount >= Nn;
806     }
807 
808     /**
809      * Process the given HTTP request.
810      *
811      * @param request HTTP request.
812      * @param response HTTP response.
813      *
814      * @exception IOException Thrown if an I/O error occurred.
815      */
816     private void processHttpRequest(HttpRequest request,
817             HttpResponse response)
818             throws IOException {
819         long now = System.currentTimeMillis();
820 
821         // Reject an invalid HTTP request.
822         if (!request.isValid()) {
823             response.setStatusCode(HttpResponse.Status.STATUS_400_BAD_REQUEST);
824             PrintWriter out = response.getPrintWriter();
825             printStatusHtmlStart(out, now);
826             out.println("<P>");
827             out.println("400 Bad Request");
828             printStatusHtmlEnd(out);
829         } // Reject all methods except GET.
830         else if (!request.getMethod().equals(HttpRequest.GET_METHOD)) {
831             response.setStatusCode(HttpResponse.Status.STATUS_501_NOT_IMPLEMENTED);
832             PrintWriter out = response.getPrintWriter();
833             printStatusHtmlStart(out, now);
834             out.println("<P>");
835             out.println("501 Not Implemented");
836             printStatusHtmlEnd(out);
837         } // Print the status document.
838         else if (request.getUri().equals("/")
839                 || request.getUri().equals("/?")) {
840             PrintWriter out = response.getPrintWriter();
841             printStatusHtmlStart(out, now);
842             printStatusHtmlBody(out, now);
843             printStatusHtmlEnd(out);
844         } // Print the debug document.
845         else if (request.getUri().equals("/debug")) {
846             PrintWriter out = response.getPrintWriter();
847             printDebugHtmlStart(out, now);
848             printDebugHtmlBody(out);
849             printStatusHtmlEnd(out);
850         } // Print the detailed job status document.
851         else if (request.getUri().startsWith("/job/")) {
852             String jobString = request.getUri().substring(5);
853             try {
854                 int jobNum = Integer.parseInt(jobString);
855                 PrintWriter out = response.getPrintWriter();
856                 printJobDetailHtmlStart(out, now, jobNum);
857                 printJobDetailHtmlBody(out, now, jobNum);
858                 printStatusHtmlEnd(out);
859             } catch (NumberFormatException exc) {
860                 PrintWriter out = response.getPrintWriter();
861                 printErrorHtmlStart(out);
862                 out.printf("<P>Invalid job number \"%s\"</P>\n", jobString);
863                 printErrorHtmlEnd(out);
864             }
865         } // Reject all other URIs.
866         else {
867             response.setStatusCode(HttpResponse.Status.STATUS_404_NOT_FOUND);
868             PrintWriter out = response.getPrintWriter();
869             printErrorHtmlStart(out);
870             out.println("<P>404 Not Found</P>");
871             printErrorHtmlEnd(out);
872         }
873 
874         // Send the response.
875         response.close();
876     }
877 
878     /**
879      * Print the start of the status HTML document on the given print writer.
880      *
881      * @param out Print writer.
882      * @param now Current time.
883      */
884     private void printStatusHtmlStart(PrintWriter out,
885             long now) {
886         out.println("<HTML>");
887         out.println("<HEAD>");
888         out.print("<TITLE>");
889         out.print(myClusterName);
890         out.println("</TITLE>");
891         out.print("<META HTTP-EQUIV=\"refresh\" CONTENT=\"20;url=");
892         printWebInterfaceURL(out);
893         out.println("\">");
894         out.println("<STYLE TYPE=\"text/css\">");
895         out.println("<!--");
896         out.println("* {font-family: Arial, Helvetica, Sans-Serif;}");
897         out.println("body {font-size: small;}");
898         out.println("h1 {font-size: 140%; font-weight: bold;}");
899         out.println("table {font-size: 100%;}");
900         out.println("-->");
901         out.println("</STYLE>");
902         out.println("</HEAD>");
903         out.println("<BODY>");
904         out.print("<H1>");
905         out.print(myClusterName);
906         out.println("</H1>");
907         out.println("<P>");
908         out.print("<FORM ACTION=\"");
909         printWebInterfaceURL(out);
910         out.println("\" METHOD=\"get\">");
911         out.println("<TABLE BORDER=0 CELLPADDING=0 CELLSPACING=0>");
912         out.println("<TR>");
913         out.print("<TD ALIGN=\"left\" VALIGN=\"center\">");
914         out.print("<INPUT TYPE=\"submit\" VALUE=\"Refresh\">");
915         out.println("</TD>");
916         out.println("<TD WIDTH=20> </TD>");
917         out.print("<TD ALIGN=\"left\" VALIGN=\"center\">");
918         out.print(new Date(now));
919         out.print(" -- ");
920         out.print(Version.PJ_VERSION);
921         out.println("</TD>");
922         out.println("</TR>");
923         out.println("</TABLE>");
924         out.println("</FORM>");
925     }
926 
927     /**
928      * Print the body of the status HTML document on the given print writer.
929      *
930      * @param out Print writer.
931      * @param now Current time.
932      */
933     private synchronized void printStatusHtmlBody(PrintWriter out,
934             long now) {
935         out.println("<P>");
936         out.println("<TABLE BORDER=0 CELLPADDING=0 CELLSPACING=0>");
937         out.println("<TR>");
938         out.println("<TD ALIGN=\"center\" VALIGN=\"top\">");
939 
940         out.println("Nodes");
941         out.println("<TABLE BORDER=1 CELLPADDING=3 CELLSPACING=0>");
942         out.println("<TR>");
943         out.println("<TD ALIGN=\"left\" VALIGN=\"top\">");
944 
945         out.println("<TABLE BORDER=0 CELLPADDING=3 CELLSPACING=0>");
946         printBackendLabels(out);
947         int i = 0;
948         for (BackendInfo backend : myBackendInfo) {
949             printBackendInfo(out, now, backend, i);
950             ++i;
951         }
952         out.println("</TABLE>");
953 
954         out.println("</TD>");
955         out.println("</TR>");
956         out.println("</TABLE>");
957 
958         out.println("</TD>");
959         out.println("<TD WIDTH=40> </TD>");
960         out.println("<TD ALIGN=\"center\" VALIGN=\"top\">");
961 
962         out.println("Jobs");
963         out.println("<TABLE BORDER=1 CELLPADDING=3 CELLSPACING=0>");
964         out.println("<TR>");
965         out.println("<TD ALIGN=\"left\" VALIGN=\"top\">");
966 
967         out.println("<TABLE BORDER=0 CELLPADDING=3 CELLSPACING=0>");
968         printJobLabels(out);
969         i = 0;
970         for (JobInfo job : myRunningJobList) {
971             printJobInfo(out, now, job, i);
972             ++i;
973         }
974         for (JobInfo job : myWaitingJobList) {
975             printJobInfo(out, now, job, i);
976             ++i;
977         }
978         out.println("</TABLE>");
979 
980         out.println("</TD>");
981         out.println("</TR>");
982         out.println("</TABLE>");
983 
984         printTotalComputeTime(out);
985         out.print("<BR>");
986         printJobCount(out);
987         out.println("<BR>Since " + new Date(myStartDateTime));
988 
989         out.println("</TD>");
990         out.println("</TR>");
991         out.println("</TABLE>");
992     }
993 
994     /**
995      * Print the job count.
996      *
997      * @param out Print writer.
998      */
999     private void printJobCount(PrintWriter out) {
1000         if (myNextJobNumber == 2) {
1001             out.print("1 job");
1002         } else {
1003             out.print(myNextJobNumber - 1);
1004             out.print(" jobs");
1005         }
1006         out.println(" served");
1007     }
1008 
1009     /**
1010      * Print the total CPU time.
1011      *
1012      * @param out Print writer.
1013      */
1014     private void printTotalComputeTime(PrintWriter out) {
1015         if (myTotalComputeTime < 1000000L) {
1016             out.print(myTotalComputeTime / 1000L);
1017         } else if (myTotalComputeTime < 1000000000L) {
1018             out.print("Over ");
1019             out.print(myTotalComputeTime / 1000000L);
1020             out.print(" thousand");
1021         } else if (myTotalComputeTime < 1000000000000L) {
1022             out.print("Over ");
1023             out.print(myTotalComputeTime / 1000000000L);
1024             out.print(" million");
1025         } else if (myTotalComputeTime < 1000000000000000L) {
1026             out.print("Over ");
1027             out.print(myTotalComputeTime / 1000000000000L);
1028             out.print(" billion");
1029         } else {
1030             out.print("Over ");
1031             out.print(myTotalComputeTime / 1000000000000000L);
1032             out.print(" trillion");
1033         }
1034         out.println(" CPU seconds served");
1035     }
1036 
1037     /**
1038      * Print the end of the status HTML document on the given print writer.
1039      *
1040      * @param out Print writer.
1041      */
1042     private void printStatusHtmlEnd(PrintWriter out) {
1043         out.println("<P>");
1044         out.println("<TABLE BORDER=0 CELLPADDING=0 CELLSPACING=0>");
1045         out.println("<TR>");
1046         out.println("<TD ALIGN=\"left\" VALIGN=\"top\">");
1047         out.println("Job queue web interface:&nbsp;&nbsp;");
1048         out.println("</TD>");
1049         out.println("<TD ALIGN=\"left\" VALIGN=\"top\">");
1050         out.print("<A HREF=\"");
1051         printWebInterfaceURL(out);
1052         out.print("\">");
1053         printWebInterfaceURL(out);
1054         out.println("</A>");
1055         out.println("</TD>");
1056         out.println("</TR>");
1057         out.println("<TR>");
1058         out.println("<TD ALIGN=\"left\" VALIGN=\"top\">");
1059         out.println("Powered by Parallel Java:&nbsp;&nbsp;");
1060         out.println("</TD>");
1061         out.println("<TD ALIGN=\"left\" VALIGN=\"top\">");
1062         out.println("<A HREF=\"http://www.cs.rit.edu/~ark/pj.shtml\">http://www.cs.rit.edu/~ark/pj.shtml</A>");
1063         out.println("</TD>");
1064         out.println("</TR>");
1065         out.println("<TR>");
1066         out.println("<TD ALIGN=\"left\" VALIGN=\"top\">");
1067         out.println("Developed by Alan Kaminsky:&nbsp;&nbsp;");
1068         out.println("</TD>");
1069         out.println("<TD ALIGN=\"left\" VALIGN=\"top\">");
1070         out.println("<A HREF=\"http://www.cs.rit.edu/~ark/\">http://www.cs.rit.edu/~ark/</A>");
1071         out.println("</TD>");
1072         out.println("</TR>");
1073         out.println("</TABLE>");
1074         out.println("</BODY>");
1075         out.println("</HTML>");
1076     }
1077 
1078     /**
1079      * Print the web interface URL on the given print writer.
1080      *
1081      * @param out Print writer.
1082      */
1083     private void printWebInterfaceURL(PrintWriter out) {
1084         out.printf("http://%s:%d/", myWebHost, myWebPort);
1085     }
1086 
1087     /**
1088      * Print the URL for the given job number on the given print writer.
1089      *
1090      * @param out Print writer.
1091      * @param jobNum Job number.
1092      */
1093     private void printJobNumberURL(PrintWriter out,
1094             int jobNum) {
1095         out.printf("http://%s:%d/job/%d", myWebHost, myWebPort, jobNum);
1096     }
1097 
1098     /**
1099      * Print a link for the given job number on the given print writer.
1100      *
1101      * @param out Print writer.
1102      * @param jobNum Job number.
1103      */
1104     private void printJobNumberLink(PrintWriter out,
1105             int jobNum) {
1106         out.printf("<A HREF=\"http://%s:%d/job/%d\">&nbsp;%d&nbsp;</A>",
1107                 myWebHost, myWebPort, jobNum, jobNum);
1108     }
1109 
1110     /**
1111      * Print the difference between the given times on the given print writer.
1112      *
1113      * @param out Print writer.
1114      * @param now Time now.
1115      * @param then Time then.
1116      */
1117     private void printDeltaTime(PrintWriter out,
1118             long now,
1119             long then) {
1120         out.print((now - then + 500L) / 1000L);
1121         out.print(" sec");
1122     }
1123 
1124     /**
1125      * Print the backend labels on the given print writer.
1126      *
1127      * @param out Print writer.
1128      */
1129     private void printBackendLabels(PrintWriter out) {
1130         out.println("<TR BGCOLOR=\"#E8E8E8\">");
1131         out.print("<TD ALIGN=\"left\" VALIGN=\"top\">");
1132         out.print("<I>Node</I>");
1133         out.println("</TD>");
1134         out.print("<TD ALIGN=\"left\" VALIGN=\"top\">");
1135         out.print("<I>CPUs</I>");
1136         out.println("</TD>");
1137         out.print("<TD ALIGN=\"left\" VALIGN=\"top\">");
1138         out.print("<I>Status</I>");
1139         out.println("</TD>");
1140         out.print("<TD ALIGN=\"left\" VALIGN=\"top\">");
1141         out.print("<I>Job</I>");
1142         out.println("</TD>");
1143         out.print("<TD ALIGN=\"left\" VALIGN=\"top\">");
1144         out.print("<I>Time</I>");
1145         out.println("</TD>");
1146         out.println("</TR>");
1147     }
1148 
1149     /**
1150      * Print the given backend info on the given print writer.
1151      *
1152      * @param out Print writer.
1153      * @param now Current time.
1154      * @param backend Backend info.
1155      * @param i Even = white background, odd = gray background.
1156      */
1157     private void printBackendInfo(PrintWriter out,
1158             long now,
1159             BackendInfo backend,
1160             int i) {
1161         out.print("<TR BGCOLOR=\"#");
1162         out.print(i % 2 == 0 ? "FFFFFF" : "E8E8E8");
1163         out.println("\">");
1164         out.print("<TD ALIGN=\"left\" VALIGN=\"top\">");
1165         out.print(backend.name);
1166         out.println("</TD>");
1167         out.print("<TD ALIGN=\"left\" VALIGN=\"top\">");
1168         out.print(backend.totalCpus);
1169         out.println("</TD>");
1170         out.print("<TD ALIGN=\"left\" VALIGN=\"top\">");
1171         if (backend.state == BackendInfo.State.FAILED) {
1172             out.print("<FONT COLOR=\"#FF0000\"><B>");
1173             out.print(backend.state);
1174             out.print("</B></FONT>");
1175         } else {
1176             out.print(backend.state);
1177         }
1178         out.println("</TD>");
1179         out.print("<TD ALIGN=\"left\" VALIGN=\"top\">");
1180         if (backend.job != null) {
1181             printJobNumberLink(out, backend.job.jobnum);
1182         } else {
1183             out.print("&nbsp;");
1184         }
1185         out.println("</TD>");
1186         out.print("<TD ALIGN=\"left\" VALIGN=\"top\">");
1187         if (backend.job != null) {
1188             printDeltaTime(out, now, backend.job.stateTime);
1189         } else {
1190             out.print("&nbsp;");
1191         }
1192         out.println("</TD>");
1193         out.println("</TR>");
1194     }
1195 
1196     /**
1197      * Print the job labels on the given print writer.
1198      *
1199      * @param out Print writer.
1200      */
1201     private void printJobLabels(PrintWriter out) {
1202         out.println("<TR BGCOLOR=\"#E8E8E8\">");
1203         out.print("<TD ALIGN=\"left\" VALIGN=\"top\">");
1204         out.print("<I>Job</I>");
1205         out.println("</TD>");
1206         out.print("<TD ALIGN=\"left\" VALIGN=\"top\">");
1207         out.print("<I>User</I>");
1208         out.println("</TD>");
1209         out.print("<TD ALIGN=\"left\" VALIGN=\"top\">");
1210         out.print("<I>nn</I>");
1211         out.println("</TD>");
1212         out.print("<TD ALIGN=\"left\" VALIGN=\"top\">");
1213         out.print("<I>np</I>");
1214         out.println("</TD>");
1215         out.print("<TD ALIGN=\"left\" VALIGN=\"top\">");
1216         out.print("<I>nt</I>");
1217         out.println("</TD>");
1218         out.print("<TD ALIGN=\"left\" VALIGN=\"top\">");
1219         out.print("<I>Rank</I>");
1220         out.println("</TD>");
1221         out.print("<TD ALIGN=\"left\" VALIGN=\"top\">");
1222         out.print("<I>Node</I>");
1223         out.println("</TD>");
1224         out.print("<TD ALIGN=\"left\" VALIGN=\"top\">");
1225         out.print("<I>CPUs</I>");
1226         out.println("</TD>");
1227         out.print("<TD ALIGN=\"left\" VALIGN=\"top\">");
1228         out.print("<I>Status</I>");
1229         out.println("</TD>");
1230         out.print("<TD ALIGN=\"left\" VALIGN=\"top\">");
1231         out.print("<I>Time</I>");
1232         out.println("</TD>");
1233         out.println("</TR>");
1234     }
1235 
1236     /**
1237      * Print the given job info on the given print writer.
1238      *
1239      * @param out Print writer.
1240      * @param now Current time.
1241      * @param job Job info.
1242      * @param i Even = white background, odd = gray background.
1243      */
1244     private void printJobInfo(PrintWriter out,
1245             long now,
1246             JobInfo job,
1247             int i) {
1248         boolean first;
1249         out.print("<TR BGCOLOR=\"#");
1250         out.print(i % 2 == 0 ? "FFFFFF" : "E8E8E8");
1251         out.println("\">");
1252         out.print("<TD ALIGN=\"left\" VALIGN=\"top\">");
1253         printJobNumberLink(out, job.jobnum);
1254         out.println("</TD>");
1255         out.print("<TD ALIGN=\"left\" VALIGN=\"top\">");
1256         out.print(job.username);
1257         out.println("</TD>");
1258         out.print("<TD ALIGN=\"left\" VALIGN=\"top\">");
1259         out.print(job.Nn);
1260         out.println("</TD>");
1261         out.print("<TD ALIGN=\"left\" VALIGN=\"top\">");
1262         out.print(job.Np);
1263         out.println("</TD>");
1264         out.print("<TD ALIGN=\"left\" VALIGN=\"top\">");
1265         out.print(job.Nt == 0 ? "all" : "" + job.Nt);
1266         out.println("</TD>");
1267         out.print("<TD ALIGN=\"left\" VALIGN=\"top\">");
1268         if (job.count == 0) {
1269             out.print("&nbsp;");
1270         } else {
1271             for (int j = 0; j < job.count; ++j) {
1272                 if (j > 0) {
1273                     out.print("<BR>");
1274                 }
1275                 out.print(j);
1276             }
1277         }
1278         out.println("</TD>");
1279         out.print("<TD ALIGN=\"left\" VALIGN=\"top\">");
1280         if (job.count == 0) {
1281             out.print("&nbsp;");
1282         } else {
1283             for (int j = 0; j < job.count; ++j) {
1284                 if (j > 0) {
1285                     out.print("<BR>");
1286                 }
1287                 out.print(job.backend[j].name);
1288             }
1289         }
1290         out.println("</TD>");
1291         out.print("<TD ALIGN=\"left\" VALIGN=\"top\">");
1292         if (job.count == 0) {
1293             out.print("&nbsp;");
1294         } else {
1295             for (int j = 0; j < job.count; ++j) {
1296                 if (j > 0) {
1297                     out.print("<BR>");
1298                 }
1299                 out.print(job.cpus[j]);
1300             }
1301         }
1302         out.println("</TD>");
1303         out.print("<TD ALIGN=\"left\" VALIGN=\"top\">");
1304         out.print(job.state);
1305         out.println("</TD>");
1306         out.print("<TD ALIGN=\"left\" VALIGN=\"top\">");
1307         printDeltaTime(out, now, job.stateTime);
1308         out.println("</TD>");
1309         out.println("</TR>");
1310     }
1311 
1312     /**
1313      * Print the start of the debug HTML document on the given print writer.
1314      *
1315      * @param out Print writer.
1316      */
1317     private void printDebugHtmlStart(PrintWriter out,
1318             long now) {
1319         out.println("<HTML>");
1320         out.println("<HEAD>");
1321         out.print("<TITLE>");
1322         out.print(myClusterName);
1323         out.println("</TITLE>");
1324         out.println("<STYLE TYPE=\"text/css\">");
1325         out.println("<!--");
1326         out.println("* {font-family: Arial, Helvetica, Sans-Serif;}");
1327         out.println("body {font-size: small;}");
1328         out.println("h1 {font-size: 140%; font-weight: bold;}");
1329         out.println("table {font-size: 100%;}");
1330         out.println("-->");
1331         out.println("</STYLE>");
1332         out.println("</HEAD>");
1333         out.println("<BODY>");
1334         out.print("<H1>");
1335         out.print(myClusterName);
1336         out.println("</H1>");
1337         out.println("<P>");
1338         out.print(new Date(now));
1339         out.print(" -- ");
1340         out.print(Version.PJ_VERSION);
1341         out.println("</P>");
1342     }
1343 
1344     /**
1345      * Print the body of the debug HTML document on the given print writer.
1346      *
1347      * @param out Print writer.
1348      */
1349     private void printDebugHtmlBody(PrintWriter out) {
1350         out.println("<P>");
1351         out.println("<HR/>");
1352         out.println("<H3>Thread Dump</H3>");
1353         out.println("</P>");
1354         Map<Thread, StackTraceElement[]> traces = Thread.getAllStackTraces();
1355         for (Map.Entry<Thread, StackTraceElement[]> entry : traces.entrySet()) {
1356             Thread thread = entry.getKey();
1357             out.println("<P>");
1358             out.print("Name: ");
1359             out.print(thread.getName());
1360             /// out.println("&nbsp;&nbsp;&nbsp;&nbsp;");
1361             /// out.print(" ID: ");
1362             /// out.print(thread.getId());
1363             out.println("&nbsp;&nbsp;&nbsp;&nbsp;");
1364             out.print(" Daemon: ");
1365             out.print(thread.isDaemon() ? "yes" : "no");
1366             out.println("&nbsp;&nbsp;&nbsp;&nbsp;");
1367             out.print(" State: ");
1368             out.print(thread.getState());
1369             out.println("&nbsp;&nbsp;&nbsp;&nbsp;");
1370             out.print(" Priority: ");
1371             out.print(thread.getPriority());
1372             out.println("&nbsp;&nbsp;&nbsp;&nbsp;");
1373             out.print(" Thread Group: ");
1374             out.print(thread.getThreadGroup().getName());
1375             out.println();
1376             for (StackTraceElement element : entry.getValue()) {
1377                 out.print("<BR/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;");
1378                 out.println(element);
1379             }
1380             out.println("</P>");
1381         }
1382         out.println("<P>");
1383         out.println("<HR/>");
1384         out.println("</P>");
1385     }
1386 
1387     /**
1388      * Print the start of the detailed job status HTML document on the given
1389      * print writer.
1390      *
1391      * @param out Print writer.
1392      * @param now Current time.
1393      * @param jobNum Job number.
1394      */
1395     private void printJobDetailHtmlStart(PrintWriter out,
1396             long now,
1397             int jobNum) {
1398         out.println("<HTML>");
1399         out.println("<HEAD>");
1400         out.print("<TITLE>");
1401         out.print(myClusterName);
1402         out.println("</TITLE>");
1403         out.print("<META HTTP-EQUIV=\"refresh\" CONTENT=\"20;url=");
1404         printJobNumberURL(out, jobNum);
1405         out.println("\">");
1406         out.println("<STYLE TYPE=\"text/css\">");
1407         out.println("<!--");
1408         out.println("* {font-family: Arial, Helvetica, Sans-Serif;}");
1409         out.println("body {font-size: small;}");
1410         out.println("h1 {font-size: 140%; font-weight: bold;}");
1411         out.println("table {font-size: 100%;}");
1412         out.println("-->");
1413         out.println("</STYLE>");
1414         out.println("</HEAD>");
1415         out.println("<BODY>");
1416         out.print("<H1>");
1417         out.print(myClusterName);
1418         out.println("</H1>");
1419         out.println("<P>");
1420         out.print("<FORM ACTION=\"");
1421         printJobNumberURL(out, jobNum);
1422         out.println("\" METHOD=\"get\">");
1423         out.println("<TABLE BORDER=0 CELLPADDING=0 CELLSPACING=0>");
1424         out.println("<TR>");
1425         out.print("<TD ALIGN=\"left\" VALIGN=\"center\">");
1426         out.print("<INPUT TYPE=\"submit\" VALUE=\"Refresh\">");
1427         out.println("</TD>");
1428         out.println("<TD WIDTH=20> </TD>");
1429         out.print("<TD ALIGN=\"left\" VALIGN=\"center\">");
1430         out.print(new Date(now));
1431         out.print(" -- ");
1432         out.print(Version.PJ_VERSION);
1433         out.println("</TD>");
1434         out.println("</TR>");
1435         out.println("</TABLE>");
1436         out.println("</FORM>");
1437     }
1438 
1439     /**
1440      * Print the body of the detailed job status HTML document on the given
1441      * print writer.
1442      *
1443      * @param out Print writer.
1444      * @param now Current time.
1445      * @param jobNum Job number.
1446      */
1447     private synchronized void printJobDetailHtmlBody(PrintWriter out,
1448             long now,
1449             int jobNum) {
1450         JobInfo jobInfo = null;
1451 
1452         // Find job info.
1453         for (JobInfo job : myRunningJobList) {
1454             if (job.jobnum == jobNum) {
1455                 jobInfo = job;
1456                 break;
1457             }
1458         }
1459         if (jobInfo == null) {
1460             for (JobInfo job : myWaitingJobList) {
1461                 if (job.jobnum == jobNum) {
1462                     jobInfo = job;
1463                     break;
1464                 }
1465             }
1466         }
1467 
1468         out.println("<P>");
1469         out.println("<TABLE BORDER=0 CELLPADDING=0 CELLSPACING=0>");
1470         out.println("<TR>");
1471         out.println("<TD ALIGN=\"left\" VALIGN=\"top\"><B>Job:</B></TD>");
1472         out.println("<TD WIDTH=10> </TD>");
1473         out.printf("<TD ALIGN=\"left\" VALIGN=\"top\"><B>%d</B></TD>",
1474                 jobNum);
1475         out.println("</TR>");
1476         out.println("<TR>");
1477         out.println("<TD ALIGN=\"left\" VALIGN=\"top\">User:</TD>");
1478         out.println("<TD WIDTH=10> </TD>");
1479         out.printf("<TD ALIGN=\"left\" VALIGN=\"top\">%s</TD>",
1480                 jobInfo == null ? " " : jobInfo.username);
1481         out.println("</TR>");
1482         out.println("<TR>");
1483         out.println("<TD ALIGN=\"left\" VALIGN=\"top\">Nodes (nn):</TD>");
1484         out.println("<TD WIDTH=10> </TD>");
1485         out.printf("<TD ALIGN=\"left\" VALIGN=\"top\">%s</TD>",
1486                 jobInfo == null ? " " : "" + jobInfo.Nn);
1487         out.println("</TR>");
1488         out.println("<TR>");
1489         out.println("<TD ALIGN=\"left\" VALIGN=\"top\">Processes (np):</TD>");
1490         out.println("<TD WIDTH=10> </TD>");
1491         out.printf("<TD ALIGN=\"left\" VALIGN=\"top\">%s</TD>",
1492                 jobInfo == null ? " " : "" + jobInfo.Np);
1493         out.println("</TR>");
1494         out.println("<TR>");
1495         out.println("<TD ALIGN=\"left\" VALIGN=\"top\">Threads (nt):</TD>");
1496         out.println("<TD WIDTH=10> </TD>");
1497         out.printf("<TD ALIGN=\"left\" VALIGN=\"top\">%s</TD>",
1498                 jobInfo == null ? " " : jobInfo.Nt == 0 ? "All" : "" + jobInfo.Nt);
1499         out.println("</TR>");
1500         out.println("<TR>");
1501         out.println("<TD ALIGN=\"left\" VALIGN=\"top\">Status:</TD>");
1502         out.println("<TD WIDTH=10> </TD>");
1503         out.printf("<TD ALIGN=\"left\" VALIGN=\"top\">%s</TD>",
1504                 jobInfo == null ? "Not in queue" : jobInfo.state);
1505         out.println("</TR>");
1506         out.println("<TR>");
1507         out.println("<TD ALIGN=\"left\" VALIGN=\"top\">Time:</TD>");
1508         out.println("<TD WIDTH=10> </TD>");
1509         out.print("<TD ALIGN=\"left\" VALIGN=\"top\">");
1510         if (jobInfo == null) {
1511             out.print(" ");
1512         } else {
1513             printDeltaTime(out, now, jobInfo.stateTime);
1514         }
1515         out.println("</TD>");
1516         out.println("</TR>");
1517         out.println("</TABLE>");
1518         out.println("</P>");
1519 
1520         if (jobInfo == null || jobInfo.count == 0) {
1521             return;
1522         }
1523 
1524         out.println("<P>");
1525         out.println("<TABLE BORDER=0 CELLPADDING=0 CELLSPACING=0>");
1526         out.println("<TR>");
1527         out.println("<TD ALIGN=\"center\" VALIGN=\"top\">");
1528 
1529         out.println("Processes");
1530         out.println("<TABLE BORDER=1 CELLPADDING=3 CELLSPACING=0>");
1531         out.println("<TR>");
1532         out.println("<TD ALIGN=\"left\" VALIGN=\"top\">");
1533 
1534         out.println("<TABLE BORDER=0 CELLPADDING=3 CELLSPACING=0>");
1535         printJobDetailProcessLabels(out);
1536         for (int i = 0; i < jobInfo.count; ++i) {
1537             printJobDetailProcessInfo(out, jobInfo, i);
1538         }
1539         out.println("</TABLE>");
1540 
1541         out.println("</TD>");
1542         out.println("</TR>");
1543         out.println("</TABLE>");
1544 
1545         out.println("</TD>");
1546         out.println("</TR>");
1547         out.println("</TABLE>");
1548         out.println("</P>");
1549     }
1550 
1551     /**
1552      * Print the detailed job status process labels on the given print writer.
1553      *
1554      * @param out Print writer.
1555      */
1556     private void printJobDetailProcessLabels(PrintWriter out) {
1557         out.println("<TR BGCOLOR=\"#E8E8E8\">");
1558         out.print("<TD ALIGN=\"left\" VALIGN=\"top\">");
1559         out.print("<I>Rank</I>");
1560         out.println("</TD>");
1561         out.print("<TD ALIGN=\"left\" VALIGN=\"top\">");
1562         out.print("<I>Node</I>");
1563         out.println("</TD>");
1564         out.print("<TD ALIGN=\"left\" VALIGN=\"top\">");
1565         out.print("<I>CPUs</I>");
1566         out.println("</TD>");
1567         out.print("<TD ALIGN=\"left\" VALIGN=\"top\">");
1568         out.print("<I>Comment</I>");
1569         out.println("</TD>");
1570         out.println("</TR>");
1571     }
1572 
1573     /**
1574      * Print the detailed job status process information on the given print
1575      * writer.
1576      *
1577      * @param out Print writer.
1578      * @param jobInfo Job info.
1579      * @param rank Process rank.
1580      */
1581     private void printJobDetailProcessInfo(PrintWriter out,
1582             JobInfo jobInfo,
1583             int rank) {
1584         out.printf("<TR BGCOLOR=\"#%s\">\n",
1585                 rank % 2 == 0 ? "FFFFFF" : "E8E8E8");
1586         out.printf("<TD ALIGN=\"left\" VALIGN=\"top\">%d&nbsp;&nbsp;</TD>\n",
1587                 rank);
1588         out.printf("<TD ALIGN=\"left\" VALIGN=\"top\">%s&nbsp;&nbsp;</TD>\n",
1589                 jobInfo.backend[rank].name);
1590         out.printf("<TD ALIGN=\"left\" VALIGN=\"top\">%d&nbsp;&nbsp;</TD>\n",
1591                 jobInfo.cpus[rank]);
1592         out.printf("<TD ALIGN=\"left\" VALIGN=\"top\">%s</TD>\n",
1593                 jobInfo.comment[rank]);
1594         out.println("</TR>");
1595     }
1596 
1597     /**
1598      * Print the start of the error HTML document on the given print writer.
1599      *
1600      * @param out Print writer.
1601      */
1602     private void printErrorHtmlStart(PrintWriter out) {
1603         out.println("<HTML>");
1604         out.println("<HEAD>");
1605         out.print("<TITLE>");
1606         out.print(myClusterName);
1607         out.println("</TITLE>");
1608         out.println("<STYLE TYPE=\"text/css\">");
1609         out.println("<!--");
1610         out.println("* {font-family: Arial, Helvetica, Sans-Serif;}");
1611         out.println("body {font-size: small;}");
1612         out.println("h1 {font-size: 140%; font-weight: bold;}");
1613         out.println("table {font-size: 100%;}");
1614         out.println("-->");
1615         out.println("</STYLE>");
1616         out.println("</HEAD>");
1617         out.println("<BODY>");
1618     }
1619 
1620     /**
1621      * Print the end of the error HTML document on the given print writer.
1622      *
1623      * @param out Print writer.
1624      */
1625     private void printErrorHtmlEnd(PrintWriter out) {
1626         out.println("</BODY>");
1627         out.println("</HTML>");
1628     }
1629 
1630     /**
1631      * Shut down this Job Scheduler.
1632      */
1633     private void shutdown() {
1634         if (myChannelGroup != null) {
1635             myChannelGroup.close();
1636         }
1637         if (myHttpServer != null) {
1638             try {
1639                 myHttpServer.close();
1640             } catch (IOException ignored) {
1641             }
1642         }
1643         myLog.log("Stopped");
1644     }
1645 
1646 // Main program.
1647     /**
1648      * Job Scheduler main program.
1649      *
1650      * @param args an array of {@link java.lang.String} objects.
1651      * @throws java.lang.Exception if any.
1652      */
1653     public static void main(String[] args)
1654             throws Exception {
1655         if (args.length != 1) {
1656             System.err.println("Usage: java edu.rit.pj.cluster.JobScheduler <configfile>");
1657             System.exit(1);
1658         }
1659 
1660         JobScheduler scheduler = new JobScheduler(args[0]);
1661         scheduler.run();
1662     }
1663 
1664 }