1 //******************************************************************************
2 //
3 // File: LongStrideForLoop.java
4 // Package: edu.rit.pj
5 // Unit: Class edu.rit.pj.LongStrideForLoop
6 //
7 // This Java source file is copyright (C) 2007 by Alan Kaminsky. All rights
8 // reserved. For further information, contact the author, Alan Kaminsky, at
9 // ark@cs.rit.edu.
10 //
11 // This Java source file is part of the Parallel Java Library ("PJ"). PJ is free
12 // software; you can redistribute it and/or modify it under the terms of the GNU
13 // General Public License as published by the Free Software Foundation; either
14 // version 3 of the License, or (at your option) any later version.
15 //
16 // PJ is distributed in the hope that it will be useful, but WITHOUT ANY
17 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
18 // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
19 //
20 // Linking this library statically or dynamically with other modules is making a
21 // combined work based on this library. Thus, the terms and conditions of the GNU
22 // General Public License cover the whole combination.
23 //
24 // As a special exception, the copyright holders of this library give you
25 // permission to link this library with independent modules to produce an
26 // executable, regardless of the license terms of these independent modules, and
27 // to copy and distribute the resulting executable under terms of your choice,
28 // provided that you also meet, for each linked independent module, the terms
29 // and conditions of the license of that module. An independent module is a module
30 // which is not derived from or based on this library. If you modify this library,
31 // you may extend this exception to your version of the library, but you are not
32 // obligated to do so. If you do not wish to do so, delete this exception
33 // statement from your version.
34 //
35 // A copy of the GNU General Public License is provided in the file gpl.txt. You
36 // may also obtain a copy of the GNU General Public License on the World Wide
37 // Web at http://www.gnu.org/licenses/gpl.html.
38 //
39 //******************************************************************************
40 package edu.rit.pj;
41
42 /**
43 * Class LongStrideForLoop is the abstract base class for one variation of a
44 * parallel for loop that is executed inside a {@linkplain ParallelRegion}. The
45 * loop index data type is <code>long</code>. The loop stride is explicitly
46 * specified.
47 * <P>
48 * To execute a parallel for loop, create a {@linkplain ParallelRegion} object;
49 * create an instance of a concrete subclass of class LongStrideForLoop; and
50 * pass this instance to the parallel region's <code>execute()</code> method. Either
51 * every parallel team thread must call the parallel region's <code>execute()</code>
52 * method with identical arguments, or every thread must not call the
53 * <code>execute()</code> method. You can do all this using an anonymous inner
54 * class; for example:
55 * <PRE>
56 * new ParallelRegion()
57 * {
58 * . . .
59 * public void run()
60 * {
61 * . . .
62 * execute (0L, 98L, 2L, new LongStrideForLoop()
63 * {
64 * // Thread local variable declarations
65 * . . .
66 * public void start()
67 * {
68 * // Per-thread pre-loop initialization code
69 * . . .
70 * }
71 * public void run (long first, long last, long stride)
72 * {
73 * // Loop code
74 * . . .
75 * }
76 * public void finish()
77 * {
78 * // Per-thread post-loop finalization code
79 * . . .
80 * }
81 * });
82 * }
83 * . . .
84 * }
85 * </PRE>
86 * <P>
87 * The parallel region's <code>execute()</code> method does the following. Each
88 * parallel team thread calls the parallel for loop's <code>start()</code> method
89 * once before beginning any loop iterations. The range of loop indexes is
90 * divided into "chunks" and the chunks are apportioned among the threads, in a
91 * manner determined by the parallel for loop's schedule as returned by the
92 * <code>schedule()</code> method. Each thread repeatedly calls the parallel for
93 * loop's <code>run()</code> method, passing in a different chunk on each call,
94 * until all the chunks assigned to that thread have been performed. When a
95 * thread has finished calling <code>run()</code>, the thread calls the parallel for
96 * loop's <code>finish()</code> method. Then the thread waits at a barrier. When all
97 * the threads have reached the barrier, the <code>execute()</code> method returns.
98 * <P>
99 * Note that each parallel team thread actually creates its own instance of the
100 * parallel for loop class and passes that instance to the parallel region's
101 * <code>execute()</code> method. Thus, any fields declared in the parallel for loop
102 * class will <I>not</I> be shared by all the threads, but instead will be
103 * private to each thread.
104 * <P>
105 * The <code>start()</code> method is intended for performing per-thread
106 * initialization before starting the loop iterations. If no such initialization
107 * is needed, omit the <code>start()</code> method.
108 * <P>
109 * The <code>run()</code> method contains the code for the loop. The first and last
110 * indexes for a chunk of loop iterations are passed in as arguments. The loop
111 * stride, which is always positive, is also explicitly specified as an
112 * argument. The parallel for loop's <code>run()</code> method must be coded this
113 * way:
114 * <PRE>
115 * public void run (long first, long last, long stride)
116 * {
117 * for (long i = first; i <= last; i += stride)
118 * {
119 * // Loop body code
120 * . . .
121 * }
122 * }
123 * </PRE> with the loop indexes running from <code>first</code> to <code>last</code>
124 * inclusive and increasing by <code>stride</code> on each iteration.
125 * <P>
126 * The <code>finish()</code> method is intended for performing per-thread
127 * finalization after finishing the loop iterations. If no such finalization is
128 * needed, omit the <code>finish()</code> method.
129 * <P>
130 * Sometimes a portion of a parallel for loop has to be executed sequentially in
131 * the order of the loop indexes, while the rest of the parallel for loop can be
132 * executed concurrently. For example, the loop body is performing some
133 * computation that can be executed in parallel for different loop indexes, but
134 * the results of each computation must be written to a file sequentially in the
135 * order of the loop indexes. The <code>ordered()</code> method is provided for this
136 * purpose. A call to the <code>ordered()</code> method may appear once in the
137 * parallel for loop's <code>run()</code> method, like so:
138 * <PRE>
139 * public void run (long first, long last, long stride)
140 * {
141 * for (long i = first; i <= last; i += stride)
142 * {
143 * // This portion executed concurrently
144 * . . .
145 * ordered (new ParallelSection()
146 * {
147 * public void run()
148 * {
149 * // This portion executed sequentially
150 * // in the order of the loop indexes
151 * . . .
152 * }
153 * });
154 * // This portion executed concurrently again
155 * . . .
156 * }
157 * }
158 * </PRE> When called, the <code>ordered()</code> method waits until the
159 * <code>ordered()</code>
160 * method has been called and has returned in all loop iterations prior to the
161 * current loop iteration. Then the <code>ordered()</code> method calls the given
162 * parallel section's <code>run()</code> method. When the parallel section's
163 * <code>run()</code> method returns, the <code>ordered()</code> method returns. If the
164 * parallel section's <code>run()</code> method throws an exception, the
165 * <code>ordered()</code> method throws that same exception.
166 * <P>
167 * It is possible to stop a parallel for loop using the <code>stopLoop()</code>
168 * method, like this:
169 * <PRE>
170 * public void run (long first, long last, long stride)
171 * {
172 * for (long i = first; i <= last; i += stride)
173 * {
174 * // Loop body
175 * . . .
176 * if (/*time to stop the loop*/)
177 * {
178 * stopLoop();
179 * break;
180 * }
181 * // More loop body
182 * . . .
183 * }
184 * }
185 * </PRE> Once <code>stopLoop()</code> is called, after each parallel team thread
186 * finishes executing its current chunk of iterations, each thread will execute
187 * no further chunks and will proceed to finish the parallel for loop. Note well
188 * that stopping a parallel for loop is not the same as executing a
189 * <code>break</code> statement in a regular for loop. The parallel for loop does
190 * not stop until each thread, <I>including the thread that called
191 * <code>stopLoop()</code></I>, has finished its current <I>chunk</I> of iterations.
192 * Thus, depending on the parallel for loop's schedule, additional iterations
193 * may be executed after <code>stopLoop()</code> is called. (The <code>break</code>
194 * statement in the above example causes the thread that called
195 * <code>stopLoop()</code> to finish its chunk of iterations early.)
196 * <P>
197 * Normally, at the end of the parallel for loop, the parallel team threads wait
198 * for each other at a barrier. To eliminate this barrier wait, include
199 * {@link edu.rit.pj.BarrierAction#NO_WAIT BarrierAction.NO_WAIT} in the <code>execute()</code>
200 * method call:
201 * <PRE>
202 * new ParallelRegion()
203 * {
204 * . . .
205 * public void run()
206 * {
207 * . . .
208 * execute (0L, 98L, 2L, new LongStrideForLoop()
209 * {
210 * . . .
211 * },
212 * BarrierAction.NO_WAIT);
213 * . . .
214 * }
215 * }
216 * </PRE> To execute a section of code in a single thread as part of the barrier
217 * synchronization, include an instance of class {@linkplain BarrierAction} in
218 * the <code>execute()</code> method call. The barrier action object's
219 * <code>run()</code> method contains the code to be executed in a single thread
220 * while the other threads wait:
221 * <PRE>
222 * new ParallelRegion()
223 * {
224 * . . .
225 * public void run()
226 * {
227 * . . .
228 * execute (0L, 98L, 2L, new LongStrideForLoop()
229 * {
230 * . . .
231 * },
232 * new BarrierAction()
233 * {
234 * public void run()
235 * {
236 * // Single-threaded code goes here
237 * . . .
238 * }
239 * });
240 * . . .
241 * }
242 * }
243 * </PRE> For further information, see class {@linkplain BarrierAction}.
244 * <P>
245 * If the parallel for loop's <code>start()</code>, <code>run()</code>, or
246 * <code>finish()</code> method throws an exception in one of the threads, then that
247 * thread executes no further code in the loop, and the parallel region's
248 * <code>execute()</code> method throws that same exception in that thread.
249 * Furthermore, the other threads in the parallel team also execute no further
250 * code in the loop after finishing their current chunks. Thus, if one thread
251 * throws an exception, the whole parallel for loop exits with some (perhaps
252 * none) of the iterations unperformed.
253 *
254 * @author Alan Kaminsky
255 * @version 11-Nov-2007
256 */
257 public abstract class LongStrideForLoop
258 extends ParallelForLoop {
259
260 // Hidden data members.
261 // Parallel for loop schedule.
262 LongSchedule mySchedule;
263
264 // Loop index and stride for ordered() construct.
265 long myOrderedIndex;
266 long myStride;
267
268 // Exported constructors.
269 /**
270 * Construct a new parallel for loop.
271 */
272 public LongStrideForLoop() {
273 super();
274 }
275
276 // Exported operations.
277 /**
278 * Determine this parallel for loop's schedule. The schedule determines how
279 * the loop iterations are apportioned among the parallel team threads. For
280 * further information, see class {@linkplain LongSchedule}.
281 * <P>
282 * The <code>schedule()</code> method may be overridden in a subclass to return
283 * the desired schedule. If not overridden, the default is a runtime
284 * schedule (see {@link edu.rit.pj.LongSchedule#runtime()}).
285 *
286 * @return Schedule for this parallel for loop.
287 */
288 public LongSchedule schedule() {
289 return LongSchedule.runtime();
290 }
291
292 /**
293 * Perform per-thread initialization actions before starting the loop
294 * iterations.
295 * <P>
296 * The <code>start()</code> method may be overridden in a subclass. If not
297 * overridden, the <code>start()</code> method does nothing.
298 *
299 * @exception Exception The <code>start()</code> method may throw any exception.
300 * @throws java.lang.Exception if any.
301 */
302 public void start()
303 throws Exception {
304 }
305
306 /**
307 * Execute one chunk of iterations of this parallel for loop. The
308 * <code>run()</code> method must perform the loop body for indexes
309 * <code>first</code> through <code>last</code> inclusive, increasing the loop index
310 * by <code>stride</code> after each iteration.
311 * <P>
312 * The <code>run()</code> method must be overridden in a subclass.
313 *
314 * @param first First loop index.
315 * @param last Last loop index.
316 * @param stride Loop index stride, always positive.
317 * @exception Exception The <code>run()</code> method may throw any exception.
318 * @throws java.lang.Exception if any.
319 */
320 public abstract void run(long first,
321 long last,
322 long stride)
323 throws Exception;
324
325 /**
326 * Perform per-thread finalization actions after finishing the loop
327 * iterations.
328 * <P>
329 * The <code>finish()</code> method may be overridden in a subclass. If not
330 * overridden, the <code>finish()</code> method does nothing.
331 *
332 * @exception Exception The <code>finish()</code> method may throw any
333 * exception.
334 * @throws java.lang.Exception if any.
335 */
336 public void finish()
337 throws Exception {
338 }
339
340 /**
341 * Execute the given section of code in order of the loop indexes. A call to
342 * the <code>ordered()</code> method may appear in this parallel for loop's
343 * <code>run()</code> method. When called, the <code>ordered()</code> method waits
344 * until the <code>ordered()</code> method has been called and has returned in
345 * all loop iterations prior to the current loop iteration. Then the
346 * <code>ordered()</code> method calls the <code>run()</code> method of
347 * <code>theParallelSection</code>. When the parallel section's <code>run()</code>
348 * method returns, the <code>ordered()</code> method returns. If the parallel
349 * section's <code>run()</code> method throws an exception, the
350 * <code>ordered()</code> method throws that same exception.
351 * <P>
352 * The <code>ordered()</code> method is used when a portion of a parallel for
353 * loop has to be executed sequentially in the order of the loop indexes,
354 * while the rest of the parallel for loop can be executed concurrently.
355 * <P>
356 * <I>Note:</I> Either the <code>ordered()</code> method must be called exactly
357 * once during each call of the parallel for loop's <code>run()</code> method,
358 * or the <code>ordered()</code> method must not be called at all.
359 *
360 * @param theSection Parallel section to execute in order.
361 * @exception NullPointerException (unchecked exception) Thrown if
362 * <code>theSection</code> is null.
363 * @exception IllegalStateException (unchecked exception) Thrown if no
364 * parallel team is executing this parallel for loop.
365 * @exception Exception Thrown if <code>theSection</code>'s <code>run()</code>
366 * method throws an exception.
367 * @throws java.lang.Exception if any.
368 */
369 public final void ordered(ParallelSection theSection)
370 throws Exception {
371 // Verify preconditions.
372 if (theSection == null) {
373 throw new IllegalStateException("LongStrideForLoop.ordered(): Parallel section is null");
374 }
375 if (myTeam == null) {
376 throw new IllegalStateException("LongStrideForLoop.ordered(): No parallel team executing");
377 }
378
379 // Wait until the ordered() construct has finished for all previous
380 // iterations.
381 if (mySchedule.myOrderedIndex.get() != myOrderedIndex) {
382 Spinner spinner = new Spinner();
383 while (mySchedule.myOrderedIndex.get() != myOrderedIndex) {
384 spinner.spin();
385 }
386 }
387
388 // Execute parallel section. Propagate any exception.
389 theSection.myTeam = this.myTeam;
390 try {
391 theSection.run();
392 } finally {
393 theSection.myTeam = null;
394
395 // Notify that the ordered construct has finished for this
396 // iteration.
397 this.myOrderedIndex += this.myStride;
398 mySchedule.myOrderedIndex.set(this.myOrderedIndex);
399 }
400 }
401
402 /**
403 * Stop this parallel for loop. Once <code>stopLoop()</code> is called, after
404 * each parallel team thread finishes executing its current chunk of
405 * iterations, each thread will execute no further chunks and will proceed
406 * to finish this parallel for loop.
407 *
408 * @exception IllegalStateException (unchecked exception) Thrown if no
409 * parallel team is executing this parallel for loop.
410 */
411 public final void stopLoop() {
412 if (myTeam == null) {
413 throw new IllegalStateException("ParallelForLoop.stopLoop(): No parallel team executing");
414 }
415 mySchedule.myBreak = true;
416 }
417
418 // Hidden operations.
419 /**
420 * Execute one chunk of iterations of this parallel for loop. This method
421 * performs common processing, then calls the <code>run()</code> method.
422 *
423 * @param first First loop index.
424 * @param last Last loop index.
425 * @param stride Loop index stride, always positive.
426 *
427 * @exception Exception This method may throw any exception.
428 */
429 void commonRun(long first,
430 long last,
431 long stride)
432 throws Exception {
433 myOrderedIndex = first;
434 myStride = stride;
435 run(first, last, stride);
436 }
437
438 // Kludge to avert false sharing in multithreaded programs.
439 // Padding fields.
440 volatile long p0 = 1000L;
441 volatile long p1 = 1001L;
442 volatile long p2 = 1002L;
443 volatile long p3 = 1003L;
444 volatile long p4 = 1004L;
445 volatile long p5 = 1005L;
446 volatile long p6 = 1006L;
447 volatile long p7 = 1007L;
448 volatile long p8 = 1008L;
449 volatile long p9 = 1009L;
450 volatile long pa = 1010L;
451 volatile long pb = 1011L;
452 volatile long pc = 1012L;
453 volatile long pd = 1013L;
454 volatile long pe = 1014L;
455 volatile long pf = 1015L;
456
457 // Method to prevent the JDK from optimizing away the padding fields.
458 long preventOptimization() {
459 return p0 + p1 + p2 + p3 + p4 + p5 + p6 + p7 +
460 p8 + p9 + pa + pb + pc + pd + pe + pf;
461 }
462
463 }