1 //****************************************************************************** 2 // 3 // File: LongStrideForLoop.java 4 // Package: edu.rit.pj 5 // Unit: Class edu.rit.pj.LongStrideForLoop 6 // 7 // This Java source file is copyright (C) 2007 by Alan Kaminsky. All rights 8 // reserved. For further information, contact the author, Alan Kaminsky, at 9 // ark@cs.rit.edu. 10 // 11 // This Java source file is part of the Parallel Java Library ("PJ"). PJ is free 12 // software; you can redistribute it and/or modify it under the terms of the GNU 13 // General Public License as published by the Free Software Foundation; either 14 // version 3 of the License, or (at your option) any later version. 15 // 16 // PJ is distributed in the hope that it will be useful, but WITHOUT ANY 17 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 18 // A PARTICULAR PURPOSE. See the GNU General Public License for more details. 19 // 20 // Linking this library statically or dynamically with other modules is making a 21 // combined work based on this library. Thus, the terms and conditions of the GNU 22 // General Public License cover the whole combination. 23 // 24 // As a special exception, the copyright holders of this library give you 25 // permission to link this library with independent modules to produce an 26 // executable, regardless of the license terms of these independent modules, and 27 // to copy and distribute the resulting executable under terms of your choice, 28 // provided that you also meet, for each linked independent module, the terms 29 // and conditions of the license of that module. An independent module is a module 30 // which is not derived from or based on this library. If you modify this library, 31 // you may extend this exception to your version of the library, but you are not 32 // obligated to do so. If you do not wish to do so, delete this exception 33 // statement from your version. 34 // 35 // A copy of the GNU General Public License is provided in the file gpl.txt. You 36 // may also obtain a copy of the GNU General Public License on the World Wide 37 // Web at http://www.gnu.org/licenses/gpl.html. 38 // 39 //****************************************************************************** 40 package edu.rit.pj; 41 42 /** 43 * Class LongStrideForLoop is the abstract base class for one variation of a 44 * parallel for loop that is executed inside a {@linkplain ParallelRegion}. The 45 * loop index data type is <code>long</code>. The loop stride is explicitly 46 * specified. 47 * <P> 48 * To execute a parallel for loop, create a {@linkplain ParallelRegion} object; 49 * create an instance of a concrete subclass of class LongStrideForLoop; and 50 * pass this instance to the parallel region's <code>execute()</code> method. Either 51 * every parallel team thread must call the parallel region's <code>execute()</code> 52 * method with identical arguments, or every thread must not call the 53 * <code>execute()</code> method. You can do all this using an anonymous inner 54 * class; for example: 55 * <PRE> 56 * new ParallelRegion() 57 * { 58 * . . . 59 * public void run() 60 * { 61 * . . . 62 * execute (0L, 98L, 2L, new LongStrideForLoop() 63 * { 64 * // Thread local variable declarations 65 * . . . 66 * public void start() 67 * { 68 * // Per-thread pre-loop initialization code 69 * . . . 70 * } 71 * public void run (long first, long last, long stride) 72 * { 73 * // Loop code 74 * . . . 75 * } 76 * public void finish() 77 * { 78 * // Per-thread post-loop finalization code 79 * . . . 80 * } 81 * }); 82 * } 83 * . . . 84 * } 85 * </PRE> 86 * <P> 87 * The parallel region's <code>execute()</code> method does the following. Each 88 * parallel team thread calls the parallel for loop's <code>start()</code> method 89 * once before beginning any loop iterations. The range of loop indexes is 90 * divided into "chunks" and the chunks are apportioned among the threads, in a 91 * manner determined by the parallel for loop's schedule as returned by the 92 * <code>schedule()</code> method. Each thread repeatedly calls the parallel for 93 * loop's <code>run()</code> method, passing in a different chunk on each call, 94 * until all the chunks assigned to that thread have been performed. When a 95 * thread has finished calling <code>run()</code>, the thread calls the parallel for 96 * loop's <code>finish()</code> method. Then the thread waits at a barrier. When all 97 * the threads have reached the barrier, the <code>execute()</code> method returns. 98 * <P> 99 * Note that each parallel team thread actually creates its own instance of the 100 * parallel for loop class and passes that instance to the parallel region's 101 * <code>execute()</code> method. Thus, any fields declared in the parallel for loop 102 * class will <I>not</I> be shared by all the threads, but instead will be 103 * private to each thread. 104 * <P> 105 * The <code>start()</code> method is intended for performing per-thread 106 * initialization before starting the loop iterations. If no such initialization 107 * is needed, omit the <code>start()</code> method. 108 * <P> 109 * The <code>run()</code> method contains the code for the loop. The first and last 110 * indexes for a chunk of loop iterations are passed in as arguments. The loop 111 * stride, which is always positive, is also explicitly specified as an 112 * argument. The parallel for loop's <code>run()</code> method must be coded this 113 * way: 114 * <PRE> 115 * public void run (long first, long last, long stride) 116 * { 117 * for (long i = first; i <= last; i += stride) 118 * { 119 * // Loop body code 120 * . . . 121 * } 122 * } 123 * </PRE> with the loop indexes running from <code>first</code> to <code>last</code> 124 * inclusive and increasing by <code>stride</code> on each iteration. 125 * <P> 126 * The <code>finish()</code> method is intended for performing per-thread 127 * finalization after finishing the loop iterations. If no such finalization is 128 * needed, omit the <code>finish()</code> method. 129 * <P> 130 * Sometimes a portion of a parallel for loop has to be executed sequentially in 131 * the order of the loop indexes, while the rest of the parallel for loop can be 132 * executed concurrently. For example, the loop body is performing some 133 * computation that can be executed in parallel for different loop indexes, but 134 * the results of each computation must be written to a file sequentially in the 135 * order of the loop indexes. The <code>ordered()</code> method is provided for this 136 * purpose. A call to the <code>ordered()</code> method may appear once in the 137 * parallel for loop's <code>run()</code> method, like so: 138 * <PRE> 139 * public void run (long first, long last, long stride) 140 * { 141 * for (long i = first; i <= last; i += stride) 142 * { 143 * // This portion executed concurrently 144 * . . . 145 * ordered (new ParallelSection() 146 * { 147 * public void run() 148 * { 149 * // This portion executed sequentially 150 * // in the order of the loop indexes 151 * . . . 152 * } 153 * }); 154 * // This portion executed concurrently again 155 * . . . 156 * } 157 * } 158 * </PRE> When called, the <code>ordered()</code> method waits until the 159 * <code>ordered()</code> 160 * method has been called and has returned in all loop iterations prior to the 161 * current loop iteration. Then the <code>ordered()</code> method calls the given 162 * parallel section's <code>run()</code> method. When the parallel section's 163 * <code>run()</code> method returns, the <code>ordered()</code> method returns. If the 164 * parallel section's <code>run()</code> method throws an exception, the 165 * <code>ordered()</code> method throws that same exception. 166 * <P> 167 * It is possible to stop a parallel for loop using the <code>stopLoop()</code> 168 * method, like this: 169 * <PRE> 170 * public void run (long first, long last, long stride) 171 * { 172 * for (long i = first; i <= last; i += stride) 173 * { 174 * // Loop body 175 * . . . 176 * if (/*time to stop the loop*/) 177 * { 178 * stopLoop(); 179 * break; 180 * } 181 * // More loop body 182 * . . . 183 * } 184 * } 185 * </PRE> Once <code>stopLoop()</code> is called, after each parallel team thread 186 * finishes executing its current chunk of iterations, each thread will execute 187 * no further chunks and will proceed to finish the parallel for loop. Note well 188 * that stopping a parallel for loop is not the same as executing a 189 * <code>break</code> statement in a regular for loop. The parallel for loop does 190 * not stop until each thread, <I>including the thread that called 191 * <code>stopLoop()</code></I>, has finished its current <I>chunk</I> of iterations. 192 * Thus, depending on the parallel for loop's schedule, additional iterations 193 * may be executed after <code>stopLoop()</code> is called. (The <code>break</code> 194 * statement in the above example causes the thread that called 195 * <code>stopLoop()</code> to finish its chunk of iterations early.) 196 * <P> 197 * Normally, at the end of the parallel for loop, the parallel team threads wait 198 * for each other at a barrier. To eliminate this barrier wait, include 199 * {@link edu.rit.pj.BarrierAction#NO_WAIT BarrierAction.NO_WAIT} in the <code>execute()</code> 200 * method call: 201 * <PRE> 202 * new ParallelRegion() 203 * { 204 * . . . 205 * public void run() 206 * { 207 * . . . 208 * execute (0L, 98L, 2L, new LongStrideForLoop() 209 * { 210 * . . . 211 * }, 212 * BarrierAction.NO_WAIT); 213 * . . . 214 * } 215 * } 216 * </PRE> To execute a section of code in a single thread as part of the barrier 217 * synchronization, include an instance of class {@linkplain BarrierAction} in 218 * the <code>execute()</code> method call. The barrier action object's 219 * <code>run()</code> method contains the code to be executed in a single thread 220 * while the other threads wait: 221 * <PRE> 222 * new ParallelRegion() 223 * { 224 * . . . 225 * public void run() 226 * { 227 * . . . 228 * execute (0L, 98L, 2L, new LongStrideForLoop() 229 * { 230 * . . . 231 * }, 232 * new BarrierAction() 233 * { 234 * public void run() 235 * { 236 * // Single-threaded code goes here 237 * . . . 238 * } 239 * }); 240 * . . . 241 * } 242 * } 243 * </PRE> For further information, see class {@linkplain BarrierAction}. 244 * <P> 245 * If the parallel for loop's <code>start()</code>, <code>run()</code>, or 246 * <code>finish()</code> method throws an exception in one of the threads, then that 247 * thread executes no further code in the loop, and the parallel region's 248 * <code>execute()</code> method throws that same exception in that thread. 249 * Furthermore, the other threads in the parallel team also execute no further 250 * code in the loop after finishing their current chunks. Thus, if one thread 251 * throws an exception, the whole parallel for loop exits with some (perhaps 252 * none) of the iterations unperformed. 253 * 254 * @author Alan Kaminsky 255 * @version 11-Nov-2007 256 */ 257 public abstract class LongStrideForLoop 258 extends ParallelForLoop { 259 260 // Hidden data members. 261 // Parallel for loop schedule. 262 LongSchedule mySchedule; 263 264 // Loop index and stride for ordered() construct. 265 long myOrderedIndex; 266 long myStride; 267 268 // Exported constructors. 269 /** 270 * Construct a new parallel for loop. 271 */ 272 public LongStrideForLoop() { 273 super(); 274 } 275 276 // Exported operations. 277 /** 278 * Determine this parallel for loop's schedule. The schedule determines how 279 * the loop iterations are apportioned among the parallel team threads. For 280 * further information, see class {@linkplain LongSchedule}. 281 * <P> 282 * The <code>schedule()</code> method may be overridden in a subclass to return 283 * the desired schedule. If not overridden, the default is a runtime 284 * schedule (see {@link edu.rit.pj.LongSchedule#runtime()}). 285 * 286 * @return Schedule for this parallel for loop. 287 */ 288 public LongSchedule schedule() { 289 return LongSchedule.runtime(); 290 } 291 292 /** 293 * Perform per-thread initialization actions before starting the loop 294 * iterations. 295 * <P> 296 * The <code>start()</code> method may be overridden in a subclass. If not 297 * overridden, the <code>start()</code> method does nothing. 298 * 299 * @exception Exception The <code>start()</code> method may throw any exception. 300 * @throws java.lang.Exception if any. 301 */ 302 public void start() 303 throws Exception { 304 } 305 306 /** 307 * Execute one chunk of iterations of this parallel for loop. The 308 * <code>run()</code> method must perform the loop body for indexes 309 * <code>first</code> through <code>last</code> inclusive, increasing the loop index 310 * by <code>stride</code> after each iteration. 311 * <P> 312 * The <code>run()</code> method must be overridden in a subclass. 313 * 314 * @param first First loop index. 315 * @param last Last loop index. 316 * @param stride Loop index stride, always positive. 317 * @exception Exception The <code>run()</code> method may throw any exception. 318 * @throws java.lang.Exception if any. 319 */ 320 public abstract void run(long first, 321 long last, 322 long stride) 323 throws Exception; 324 325 /** 326 * Perform per-thread finalization actions after finishing the loop 327 * iterations. 328 * <P> 329 * The <code>finish()</code> method may be overridden in a subclass. If not 330 * overridden, the <code>finish()</code> method does nothing. 331 * 332 * @exception Exception The <code>finish()</code> method may throw any 333 * exception. 334 * @throws java.lang.Exception if any. 335 */ 336 public void finish() 337 throws Exception { 338 } 339 340 /** 341 * Execute the given section of code in order of the loop indexes. A call to 342 * the <code>ordered()</code> method may appear in this parallel for loop's 343 * <code>run()</code> method. When called, the <code>ordered()</code> method waits 344 * until the <code>ordered()</code> method has been called and has returned in 345 * all loop iterations prior to the current loop iteration. Then the 346 * <code>ordered()</code> method calls the <code>run()</code> method of 347 * <code>theParallelSection</code>. When the parallel section's <code>run()</code> 348 * method returns, the <code>ordered()</code> method returns. If the parallel 349 * section's <code>run()</code> method throws an exception, the 350 * <code>ordered()</code> method throws that same exception. 351 * <P> 352 * The <code>ordered()</code> method is used when a portion of a parallel for 353 * loop has to be executed sequentially in the order of the loop indexes, 354 * while the rest of the parallel for loop can be executed concurrently. 355 * <P> 356 * <I>Note:</I> Either the <code>ordered()</code> method must be called exactly 357 * once during each call of the parallel for loop's <code>run()</code> method, 358 * or the <code>ordered()</code> method must not be called at all. 359 * 360 * @param theSection Parallel section to execute in order. 361 * @exception NullPointerException (unchecked exception) Thrown if 362 * <code>theSection</code> is null. 363 * @exception IllegalStateException (unchecked exception) Thrown if no 364 * parallel team is executing this parallel for loop. 365 * @exception Exception Thrown if <code>theSection</code>'s <code>run()</code> 366 * method throws an exception. 367 * @throws java.lang.Exception if any. 368 */ 369 public final void ordered(ParallelSection theSection) 370 throws Exception { 371 // Verify preconditions. 372 if (theSection == null) { 373 throw new IllegalStateException("LongStrideForLoop.ordered(): Parallel section is null"); 374 } 375 if (myTeam == null) { 376 throw new IllegalStateException("LongStrideForLoop.ordered(): No parallel team executing"); 377 } 378 379 // Wait until the ordered() construct has finished for all previous 380 // iterations. 381 if (mySchedule.myOrderedIndex.get() != myOrderedIndex) { 382 Spinner spinner = new Spinner(); 383 while (mySchedule.myOrderedIndex.get() != myOrderedIndex) { 384 spinner.spin(); 385 } 386 } 387 388 // Execute parallel section. Propagate any exception. 389 theSection.myTeam = this.myTeam; 390 try { 391 theSection.run(); 392 } finally { 393 theSection.myTeam = null; 394 395 // Notify that the ordered construct has finished for this 396 // iteration. 397 this.myOrderedIndex += this.myStride; 398 mySchedule.myOrderedIndex.set(this.myOrderedIndex); 399 } 400 } 401 402 /** 403 * Stop this parallel for loop. Once <code>stopLoop()</code> is called, after 404 * each parallel team thread finishes executing its current chunk of 405 * iterations, each thread will execute no further chunks and will proceed 406 * to finish this parallel for loop. 407 * 408 * @exception IllegalStateException (unchecked exception) Thrown if no 409 * parallel team is executing this parallel for loop. 410 */ 411 public final void stopLoop() { 412 if (myTeam == null) { 413 throw new IllegalStateException("ParallelForLoop.stopLoop(): No parallel team executing"); 414 } 415 mySchedule.myBreak = true; 416 } 417 418 // Hidden operations. 419 /** 420 * Execute one chunk of iterations of this parallel for loop. This method 421 * performs common processing, then calls the <code>run()</code> method. 422 * 423 * @param first First loop index. 424 * @param last Last loop index. 425 * @param stride Loop index stride, always positive. 426 * 427 * @exception Exception This method may throw any exception. 428 */ 429 void commonRun(long first, 430 long last, 431 long stride) 432 throws Exception { 433 myOrderedIndex = first; 434 myStride = stride; 435 run(first, last, stride); 436 } 437 438 // Kludge to avert false sharing in multithreaded programs. 439 // Padding fields. 440 volatile long p0 = 1000L; 441 volatile long p1 = 1001L; 442 volatile long p2 = 1002L; 443 volatile long p3 = 1003L; 444 volatile long p4 = 1004L; 445 volatile long p5 = 1005L; 446 volatile long p6 = 1006L; 447 volatile long p7 = 1007L; 448 volatile long p8 = 1008L; 449 volatile long p9 = 1009L; 450 volatile long pa = 1010L; 451 volatile long pb = 1011L; 452 volatile long pc = 1012L; 453 volatile long pd = 1013L; 454 volatile long pe = 1014L; 455 volatile long pf = 1015L; 456 457 // Method to prevent the JDK from optimizing away the padding fields. 458 long preventOptimization() { 459 return p0 + p1 + p2 + p3 + p4 + p5 + p6 + p7 + 460 p8 + p9 + pa + pb + pc + pd + pe + pf; 461 } 462 463 }