1 //******************************************************************************
2 //
3 // File: LongForLoop.java
4 // Package: edu.rit.pj
5 // Unit: Class edu.rit.pj.LongForLoop
6 //
7 // This Java source file is copyright (C) 2007 by Alan Kaminsky. All rights
8 // reserved. For further information, contact the author, Alan Kaminsky, at
9 // ark@cs.rit.edu.
10 //
11 // This Java source file is part of the Parallel Java Library ("PJ"). PJ is free
12 // software; you can redistribute it and/or modify it under the terms of the GNU
13 // General Public License as published by the Free Software Foundation; either
14 // version 3 of the License, or (at your option) any later version.
15 //
16 // PJ is distributed in the hope that it will be useful, but WITHOUT ANY
17 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
18 // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
19 //
20 // Linking this library statically or dynamically with other modules is making a
21 // combined work based on this library. Thus, the terms and conditions of the GNU
22 // General Public License cover the whole combination.
23 //
24 // As a special exception, the copyright holders of this library give you
25 // permission to link this library with independent modules to produce an
26 // executable, regardless of the license terms of these independent modules, and
27 // to copy and distribute the resulting executable under terms of your choice,
28 // provided that you also meet, for each linked independent module, the terms
29 // and conditions of the license of that module. An independent module is a module
30 // which is not derived from or based on this library. If you modify this library,
31 // you may extend this exception to your version of the library, but you are not
32 // obligated to do so. If you do not wish to do so, delete this exception
33 // statement from your version.
34 //
35 // A copy of the GNU General Public License is provided in the file gpl.txt. You
36 // may also obtain a copy of the GNU General Public License on the World Wide
37 // Web at http://www.gnu.org/licenses/gpl.html.
38 //
39 //******************************************************************************
40 package edu.rit.pj;
41
42 /**
43 * Class LongForLoop is the abstract base class for one variation of a parallel
44 * for loop that is executed inside a {@linkplain ParallelRegion}. The loop
45 * index data type is <code>long</code>. The loop stride is implicit (+1).
46 * <P>
47 * To execute a parallel for loop, create a {@linkplain ParallelRegion} object;
48 * create an instance of a concrete subclass of class LongForLoop; and pass this
49 * instance to the parallel region's <code>execute()</code> method. Either every
50 * parallel team thread must call the parallel region's <code>execute()</code>
51 * method with identical arguments, or every thread must not call the
52 * <code>execute()</code> method. You can do all this using an anonymous inner
53 * class; for example:
54 * <PRE>
55 * new ParallelRegion()
56 * {
57 * . . .
58 * public void run()
59 * {
60 * . . .
61 * execute (0L, 99L, new LongForLoop()
62 * {
63 * // Thread local variable declarations
64 * . . .
65 * public void start()
66 * {
67 * // Per-thread pre-loop initialization code
68 * . . .
69 * }
70 * public void run (long first, long last)
71 * {
72 * // Loop code
73 * . . .
74 * }
75 * public void finish()
76 * {
77 * // Per-thread post-loop finalization code
78 * . . .
79 * }
80 * });
81 * }
82 * . . .
83 * }
84 * </PRE>
85 * <P>
86 * The parallel region's <code>execute()</code> method does the following. Each
87 * parallel team thread calls the parallel for loop's <code>start()</code> method
88 * once before beginning any loop iterations. The range of loop indexes is
89 * divided into "chunks" and the chunks are apportioned among the threads, in a
90 * manner determined by the parallel for loop's schedule as returned by the
91 * <code>schedule()</code> method. Each thread repeatedly calls the parallel for
92 * loop's <code>run()</code> method, passing in a different chunk on each call,
93 * until all the chunks assigned to that thread have been performed. When a
94 * thread has finished calling <code>run()</code>, the thread calls the parallel for
95 * loop's <code>finish()</code> method. Then the thread waits at an implicit
96 * barrier. When all the threads have reached the barrier, the
97 * <code>execute()</code> method returns.
98 * <P>
99 * Note that each parallel team thread actually creates its own instance of the
100 * parallel for loop class and passes that instance to the parallel region's
101 * <code>execute()</code> method. Thus, any fields declared in the parallel for loop
102 * class will <I>not</I> be shared by all the threads, but instead will be
103 * private to each thread.
104 * <P>
105 * The <code>start()</code> method is intended for performing per-thread
106 * initialization before starting the loop iterations. If no such initialization
107 * is needed, omit the <code>start()</code> method.
108 * <P>
109 * The <code>run()</code> method contains the code for the loop. The first and last
110 * indexes for a chunk of loop iterations are passed in as arguments. The loop
111 * stride is implicit (+1). The parallel for loop's <code>run()</code> method must
112 * be coded this way:
113 * <PRE>
114 * public void run (long first, long last)
115 * {
116 * for (long i = first; i <= last; ++ i)
117 * {
118 * // Loop body code
119 * . . .
120 * }
121 * }
122 * </PRE> with the loop indexes running from <code>first</code> to <code>last</code>
123 * inclusive and increasing by +1 on each iteration.
124 * <P>
125 * The <code>finish()</code> method is intended for performing per-thread
126 * finalization after finishing the loop iterations. If no such finalization is
127 * needed, omit the <code>finish()</code> method.
128 * <P>
129 * Sometimes a portion of a parallel for loop has to be executed sequentially in
130 * the order of the loop indexes, while the rest of the parallel for loop can be
131 * executed concurrently. For example, the loop body is performing some
132 * computation that can be executed in parallel for different loop indexes, but
133 * the results of each computation must be written to a file sequentially in the
134 * order of the loop indexes. The <code>ordered()</code> method is provided for this
135 * purpose. A call to the <code>ordered()</code> method may appear once in the
136 * parallel for loop's <code>run()</code> method, like so:
137 * <PRE>
138 * public void run (long first, long last)
139 * {
140 * for (long i = first; i <= last; ++ i)
141 * {
142 * // This portion executed concurrently
143 * . . .
144 * ordered (new ParallelSection()
145 * {
146 * public void run()
147 * {
148 * // This portion executed sequentially
149 * // in the order of the loop indexes
150 * . . .
151 * }
152 * });
153 * // This portion executed concurrently again
154 * . . .
155 * }
156 * }
157 * </PRE> When called, the <code>ordered()</code> method waits until the
158 * <code>ordered()</code>
159 * method has been called and has returned in all loop iterations prior to the
160 * current loop iteration. Then the <code>ordered()</code> method calls the given
161 * parallel section's <code>run()</code> method. When the parallel section's
162 * <code>run()</code> method returns, the <code>ordered()</code> method returns. If the
163 * parallel section's <code>run()</code> method throws an exception, the
164 * <code>ordered()</code> method throws that same exception.
165 * <P>
166 * It is possible to stop a parallel for loop using the <code>stopLoop()</code>
167 * method, like this:
168 * <PRE>
169 * public void run (long first, long last)
170 * {
171 * for (long i = first; i <= last; ++ i)
172 * {
173 * // Loop body
174 * . . .
175 * if (/*time to stop the loop*/)
176 * {
177 * stopLoop();
178 * break;
179 * }
180 * // More loop body
181 * . . .
182 * }
183 * }
184 * </PRE> Once <code>stopLoop()</code> is called, after each parallel team thread
185 * finishes executing its current chunk of iterations, each thread will execute
186 * no further chunks and will proceed to finish the parallel for loop. Note well
187 * that stopping a parallel for loop is not the same as executing a
188 * <code>break</code> statement in a regular for loop. The parallel for loop does
189 * not stop until each thread, <I>including the thread that called
190 * <code>stopLoop()</code></I>, has finished its current <I>chunk</I> of iterations.
191 * Thus, depending on the parallel for loop's schedule, additional iterations
192 * may be executed after <code>stopLoop()</code> is called. (The <code>break</code>
193 * statement in the above example causes the thread that called
194 * <code>stopLoop()</code> to finish its chunk of iterations early.)
195 * <P>
196 * Normally, at the end of the parallel for loop, the parallel team threads wait
197 * for each other at a barrier. To eliminate this barrier wait, include
198 * {@link edu.rit.pj.BarrierAction#NO_WAIT BarrierAction.NO_WAIT} in the <code>execute()</code>
199 * method call:
200 * <PRE>
201 * new ParallelRegion()
202 * {
203 * . . .
204 * public void run()
205 * {
206 * . . .
207 * execute (0L, 99L, new LongForLoop()
208 * {
209 * . . .
210 * },
211 * BarrierAction.NO_WAIT);
212 * . . .
213 * }
214 * }
215 * </PRE> To execute a section of code in a single thread as part of the barrier
216 * synchronization, include an instance of class {@linkplain BarrierAction} in
217 * the <code>execute()</code> method call. The barrier action object's
218 * <code>run()</code> method contains the code to be executed in a single thread
219 * while the other threads wait:
220 * <PRE>
221 * new ParallelRegion()
222 * {
223 * . . .
224 * public void run()
225 * {
226 * . . .
227 * execute (0L, 99L, new LongForLoop()
228 * {
229 * . . .
230 * },
231 * new BarrierAction()
232 * {
233 * public void run()
234 * {
235 * // Single-threaded code goes here
236 * . . .
237 * }
238 * });
239 * . . .
240 * }
241 * }
242 * </PRE> For further information, see class {@linkplain BarrierAction}.
243 * <P>
244 * If the parallel for loop's <code>start()</code>, <code>run()</code>, or
245 * <code>finish()</code> method throws an exception in one of the threads, then that
246 * thread executes no further code in the loop, and the parallel region's
247 * <code>execute()</code> method throws that same exception in that thread.
248 * Furthermore, the other threads in the parallel team also execute no further
249 * code in the loop after finishing their current chunks. Thus, if one thread
250 * throws an exception, the whole parallel for loop exits with some (perhaps
251 * none) of the iterations unperformed.
252 *
253 * @author Alan Kaminsky
254 * @version 11-Nov-2007
255 */
256 public abstract class LongForLoop
257 extends ParallelForLoop {
258
259 // Hidden data members.
260 // Parallel for loop schedule.
261 LongSchedule mySchedule;
262
263 // Loop index for ordered() construct.
264 long myOrderedIndex;
265
266 // Exported constructors.
267 /**
268 * Construct a new parallel for loop.
269 */
270 public LongForLoop() {
271 super();
272 }
273
274 // Exported operations.
275 /**
276 * Determine this parallel for loop's schedule. The schedule determines how
277 * the loop iterations are apportioned among the parallel team threads. For
278 * further information, see class {@linkplain LongSchedule}.
279 * <P>
280 * The <code>schedule()</code> method may be overridden in a subclass to return
281 * the desired schedule. If not overridden, the default is a runtime
282 * schedule (see {@link edu.rit.pj.LongSchedule#runtime()}).
283 *
284 * @return Schedule for this parallel for loop.
285 */
286 public LongSchedule schedule() {
287 return LongSchedule.runtime();
288 }
289
290 /**
291 * Perform per-thread initialization actions before starting the loop
292 * iterations.
293 * <P>
294 * The <code>start()</code> method may be overridden in a subclass. If not
295 * overridden, the <code>start()</code> method does nothing.
296 *
297 * @exception Exception The <code>start()</code> method may throw any exception.
298 * @throws java.lang.Exception if any.
299 */
300 public void start()
301 throws Exception {
302 }
303
304 /**
305 * Execute one chunk of iterations of this parallel for loop. The
306 * <code>run()</code> method must perform the loop body for indexes
307 * <code>first</code> through <code>last</code> inclusive, increasing the loop index
308 * by +1 after each iteration.
309 * <P>
310 * The <code>run()</code> method must be overridden in a subclass.
311 *
312 * @param first First loop index.
313 * @param last Last loop index.
314 * @exception Exception The <code>run()</code> method may throw any exception.
315 * @throws java.lang.Exception if any.
316 */
317 public abstract void run(long first,
318 long last)
319 throws Exception;
320
321 /**
322 * Perform per-thread finalization actions after finishing the loop
323 * iterations.
324 * <P>
325 * The <code>finish()</code> method may be overridden in a subclass. If not
326 * overridden, the <code>finish()</code> method does nothing.
327 *
328 * @exception Exception The <code>finish()</code> method may throw any
329 * exception.
330 * @throws java.lang.Exception if any.
331 */
332 public void finish()
333 throws Exception {
334 }
335
336 /**
337 * Execute the given section of code in order of the loop indexes. A call to
338 * the <code>ordered()</code> method may appear in this parallel for loop's
339 * <code>run()</code> method. When called, the <code>ordered()</code> method waits
340 * until the <code>ordered()</code> method has been called and has returned in
341 * all loop iterations prior to the current loop iteration. Then the
342 * <code>ordered()</code> method calls the <code>run()</code> method of
343 * <code>theParallelSection</code>. When the parallel section's <code>run()</code>
344 * method returns, the <code>ordered()</code> method returns. If the parallel
345 * section's <code>run()</code> method throws an exception, the
346 * <code>ordered()</code> method throws that same exception.
347 * <P>
348 * The <code>ordered()</code> method is used when a portion of a parallel for
349 * loop has to be executed sequentially in the order of the loop indexes,
350 * while the rest of the parallel for loop can be executed concurrently.
351 * <P>
352 * <I>Note:</I> Either the <code>ordered()</code> method must be called exactly
353 * once during each call of the parallel for loop's <code>run()</code> method,
354 * or the <code>ordered()</code> method must not be called at all.
355 *
356 * @param theSection Parallel section to execute in order.
357 * @exception NullPointerException (unchecked exception) Thrown if
358 * <code>theSection</code> is null.
359 * @exception IllegalStateException (unchecked exception) Thrown if no
360 * parallel team is executing this parallel for loop.
361 * @exception Exception Thrown if <code>theSection</code>'s <code>run()</code>
362 * method throws an exception.
363 * @throws java.lang.Exception if any.
364 */
365 public final void ordered(ParallelSection theSection)
366 throws Exception {
367 // Verify preconditions.
368 if (theSection == null) {
369 throw new IllegalStateException("LongForLoop.ordered(): Parallel section is null");
370 }
371 if (myTeam == null) {
372 throw new IllegalStateException("LongForLoop.ordered(): No parallel team executing");
373 }
374
375 // Wait until the ordered() construct has finished for all previous
376 // iterations.
377 if (mySchedule.myOrderedIndex.get() != myOrderedIndex) {
378 Spinner spinner = new Spinner();
379 while (mySchedule.myOrderedIndex.get() != myOrderedIndex) {
380 spinner.spin();
381 }
382 }
383
384 // Execute parallel section. Propagate any exception.
385 theSection.myTeam = this.myTeam;
386 try {
387 theSection.run();
388 } finally {
389 theSection.myTeam = null;
390
391 // Notify that the ordered construct has finished for this
392 // iteration.
393 ++this.myOrderedIndex;
394 mySchedule.myOrderedIndex.set(this.myOrderedIndex);
395 }
396 }
397
398 /**
399 * Stop this parallel for loop. Once <code>stopLoop()</code> is called, after
400 * each parallel team thread finishes executing its current chunk of
401 * iterations, each thread will execute no further chunks and will proceed
402 * to finish this parallel for loop.
403 *
404 * @exception IllegalStateException (unchecked exception) Thrown if no
405 * parallel team is executing this parallel for loop.
406 */
407 public final void stopLoop() {
408 if (myTeam == null) {
409 throw new IllegalStateException("ParallelForLoop.stopLoop(): No parallel team executing");
410 }
411 mySchedule.myBreak = true;
412 }
413
414 // Hidden operations.
415 /**
416 * Execute one chunk of iterations of this parallel for loop. This method
417 * performs common processing, then calls the <code>run()</code> method.
418 *
419 * @param first First loop index.
420 * @param last Last loop index.
421 *
422 * @exception Exception This method may throw any exception.
423 */
424 void commonRun(long first,
425 long last)
426 throws Exception {
427 myOrderedIndex = first;
428 run(first, last);
429 }
430
431 // Kludge to avert false sharing in multithreaded programs.
432 // Padding fields.
433 volatile long p0 = 1000L;
434 volatile long p1 = 1001L;
435 volatile long p2 = 1002L;
436 volatile long p3 = 1003L;
437 volatile long p4 = 1004L;
438 volatile long p5 = 1005L;
439 volatile long p6 = 1006L;
440 volatile long p7 = 1007L;
441 volatile long p8 = 1008L;
442 volatile long p9 = 1009L;
443 volatile long pa = 1010L;
444 volatile long pb = 1011L;
445 volatile long pc = 1012L;
446 volatile long pd = 1013L;
447 volatile long pe = 1014L;
448 volatile long pf = 1015L;
449
450 // Method to prevent the JDK from optimizing away the padding fields.
451 long preventOptimization() {
452 return p0 + p1 + p2 + p3 + p4 + p5 + p6 + p7 +
453 p8 + p9 + pa + pb + pc + pd + pe + pf;
454 }
455
456 }