1 //******************************************************************************
2 //
3 // File: RandomSubset.java
4 // Package: edu.rit.util
5 // Unit: Class edu.rit.util.RandomSubset
6 //
7 // This Java source file is copyright (C) 2011 by Alan Kaminsky. All rights
8 // reserved. For further information, contact the author, Alan Kaminsky, at
9 // ark@cs.rit.edu.
10 //
11 // This Java source file is part of the Parallel Java Library ("PJ"). PJ is free
12 // software; you can redistribute it and/or modify it under the terms of the GNU
13 // General Public License as published by the Free Software Foundation; either
14 // version 3 of the License, or (at your option) any later version.
15 //
16 // PJ is distributed in the hope that it will be useful, but WITHOUT ANY
17 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
18 // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
19 //
20 // Linking this library statically or dynamically with other modules is making a
21 // combined work based on this library. Thus, the terms and conditions of the GNU
22 // General Public License cover the whole combination.
23 //
24 // As a special exception, the copyright holders of this library give you
25 // permission to link this library with independent modules to produce an
26 // executable, regardless of the license terms of these independent modules, and
27 // to copy and distribute the resulting executable under terms of your choice,
28 // provided that you also meet, for each linked independent module, the terms
29 // and conditions of the license of that module. An independent module is a module
30 // which is not derived from or based on this library. If you modify this library,
31 // you may extend this exception to your version of the library, but you are not
32 // obligated to do so. If you do not wish to do so, delete this exception
33 // statement from your version.
34 //
35 // A copy of the GNU General Public License is provided in the file gpl.txt. You
36 // may also obtain a copy of the GNU General Public License on the World Wide
37 // Web at http://www.gnu.org/licenses/gpl.html.
38 //
39 //******************************************************************************
40 package edu.rit.util;
41
42 import java.util.HashMap;
43 import java.util.Iterator;
44 import java.util.Map;
45 import java.util.NoSuchElementException;
46
47 /**
48 * Class RandomSubset provides an object that generates a random subset of a set
49 * of integers. The original set consists of the integers from 0 to
50 * <I>N</I>−1 inclusive. The subset consists of integers chosen at random
51 * without replacement from the original set; each member of the original set is
52 * equally likely to be chosen. Class RandomSubset is an Iterator that visits
53 * the elements of the original set in a random order; each <code>next()</code>
54 * method call returns another element of the subset.
55 * <P>
56 * Calling the <code>remove(i)</code> method one or more times removes the given
57 * integers from the original set. Those integers will not be chosen for the
58 * random subset.
59 * <P>
60 * Class RandomSubset is layered on top of a pseudorandom number generator
61 * (PRNG), an instance of class {@linkplain edu.rit.util.Random Random}. Each
62 * time the <code>next()</code> method is called, one random number is consumed from
63 * the underlying PRNG.
64 * <P>
65 * Class RandomSubset has two different implementations with different storage
66 * requirements. The implementation is specified with a constructor argument.
67 * <UL>
68 *
69 * <LI>
70 * The <I>sparse</I> implementation requires less storage when the size of the
71 * random subset (the number of <code>next()</code> method calls) is a small
72 * fraction of the size of the original set (<I>N</I>).
73 *
74 * <LI>
75 * The <I>dense</I> implementation requires less storage when the size of the
76 * random subset is a large fraction of the size of the original set.
77 * </UL>
78 *
79 * @author Alan Kaminsky
80 * @version 22-Nov-2011
81 */
82 public class RandomSubset
83 implements Iterator<Integer> {
84
85 // Hidden data members.
86 // The underlying PRNG.
87 private Random prng;
88
89 // The size of the original set.
90 private int N;
91
92 // The number of random subset elements returned so far.
93 private int M;
94
95 // Helper class.
96 private Helper helper;
97
98 // Hidden helper classes.
99 // Helper abstract base class.
100 private abstract static class Helper {
101
102 // Returns the element in the permutation array at index i.
103 public abstract int getElement(int i);
104
105 // Sets the element in the permutation array at index i to the given
106 // value.
107 public abstract void setElement(int i, int value);
108
109 // Swaps the elements in the permutation array at indexes i and j.
110 public abstract void swapElements(int i, int j);
111
112 // Returns the index in the permutation array at which the given value
113 // resides.
114 public abstract int indexOf(int value);
115
116 // Restarts the iteration.
117 public abstract void restart();
118 }
119
120 // Sparse implementation helper class.
121 private static class SparseHelper
122 extends Helper {
123 // A sparse array containing a permutation of the integers from 0 to
124 // N-1. Implemented as a mapping from array index to array element. If
125 // an array index is not in the map, the corresponding array element is
126 // the same as the array index.
127
128 private final HashMap<Integer, Integer> permutation = new HashMap<>();
129
130 // Returns the element in the permutation array at index i.
131 public int getElement(int i) {
132 Integer element = permutation.get(i);
133 return element == null ? i : element;
134 }
135
136 // Sets the element in the permutation array at index i to the given
137 // value.
138 public void setElement(int i, int value) {
139 if (value == i) {
140 permutation.remove(i);
141 } else {
142 permutation.put(i, value);
143 }
144 }
145
146 // Swaps the elements in the permutation array at indexes i and j.
147 public void swapElements(int i, int j) {
148 int tmp = getElement(i);
149 setElement(i, getElement(j));
150 setElement(j, tmp);
151 }
152
153 // Returns the index in the permutation array at which the given value
154 // resides.
155 public int indexOf(int value) {
156 for (Map.Entry<Integer, Integer> entry : permutation.entrySet()) {
157 if (entry.getValue() == value) {
158 return entry.getKey();
159 }
160 }
161 return value;
162 }
163
164 // Restarts the iteration.
165 public void restart() {
166 permutation.clear();
167 }
168 }
169
170 // Dense implementation helper class.
171 private class DenseHelper
172 extends Helper {
173 // A dense array containing a permutation of the integers from 0 to
174 // N-1.
175
176 private int[] permutation = new int[N];
177
178 // Construct a new dense helper.
179 public DenseHelper() {
180 restart();
181 }
182
183 // Returns the element in the permutation array at index i.
184 public int getElement(int i) {
185 return permutation[i];
186 }
187
188 // Sets the element in the permutation array at index i to the given
189 // value.
190 public void setElement(int i,
191 int value) {
192 permutation[i] = value;
193 }
194
195 // Swaps the elements in the permutation array at indexes i and j.
196 public void swapElements(int i,
197 int j) {
198 int tmp = permutation[i];
199 permutation[i] = permutation[j];
200 permutation[j] = tmp;
201 }
202
203 // Returns the index in the permutation array at which the given value
204 // resides.
205 public int indexOf(int value) {
206 int i = 0;
207 while (permutation[i] != value) {
208 ++i;
209 }
210 return i;
211 }
212
213 // Restarts the iteration.
214 public void restart() {
215 for (int i = 0; i < N; ++i) {
216 permutation[i] = i;
217 }
218 }
219 }
220
221 // Exported constructors.
222 /**
223 * Construct a new random subset object for the original set consisting of
224 * the integers from 0 through <I>N</I>−1 inclusive. The sparse
225 * implementation is used.
226 *
227 * @param prng Underlying PRNG.
228 * @param N Size of original set.
229 * @exception NullPointerException (unchecked exception) Thrown if
230 * <code>prng</code> is null.
231 * @exception IllegalArgumentException (unchecked exception) Thrown if
232 * <I>N</I> < 0.
233 */
234 public RandomSubset(Random prng,
235 int N) {
236 this(prng, N, false);
237 }
238
239 /**
240 * Construct a new random subset object for the original set consisting of
241 * the integers from 0 through <I>N</I>−1 inclusive. If <code>dense</code>
242 * is false, the sparse implementation is used. If <code>dense</code> is true,
243 * the dense implementation is used.
244 *
245 * @param prng Underlying PRNG.
246 * @param N Size of original set.
247 * @param dense False to use the sparse implementation, true to use the
248 * dense implementation.
249 * @exception NullPointerException (unchecked exception) Thrown if
250 * <code>prng</code> is null.
251 * @exception IllegalArgumentException (unchecked exception) Thrown if
252 * <I>N</I> < 0.
253 */
254 public RandomSubset(Random prng,
255 int N,
256 boolean dense) {
257 if (prng == null) {
258 throw new NullPointerException("RandomSubset(): prng is null");
259 }
260 if (N < 0) {
261 throw new IllegalArgumentException("RandomSubset(): N = " + N + " illegal");
262 }
263 this.prng = prng;
264 this.N = N;
265 this.helper = dense ? new DenseHelper() : new SparseHelper();
266 }
267
268 // Exported operations.
269 /**
270 * Determine whether there are more integers in the random subset.
271 *
272 * @return True if there are more integers in the random subset, false if
273 * all the integers in the original set have been used up.
274 */
275 public boolean hasNext() {
276 return M < N;
277 }
278
279 /**
280 * Returns the next integer in the random subset.
281 *
282 * @return Next integer in the random subset.
283 * @exception NoSuchElementException (unchecked exception) Thrown if all the
284 * integers in the original set have been used up.
285 */
286 public Integer next() {
287 if (M >= N) {
288 throw new NoSuchElementException("RandomSubset.next(): No further elements");
289 }
290 helper.swapElements(M, M + prng.nextInt(N - M));
291 ++M;
292 return helper.getElement(M - 1);
293 }
294
295 /**
296 * Unsupported operation.
297 *
298 * @exception UnsupportedOperationException (unchecked exception) Thrown
299 * always.
300 */
301 public void remove() {
302 throw new UnsupportedOperationException();
303 }
304
305 /**
306 * Remove the given integer from the original set.
307 * <P>
308 * If <code>i</code> has already been removed from the original set, either by a
309 * <code>remove(i)</code> method call, or by a <code>next()</code> method call that
310 * returned <code>i</code>, then this method does nothing.
311 *
312 * @param i Integer to remove.
313 * @return This random subset object.
314 * @exception IllegalArgumentException (unchecked exception) Thrown if
315 * <code>i</code> is not in the range 0 through <I>N</I>−1 inclusive.
316 */
317 public RandomSubset remove(int i) {
318 if (0 > i || i >= N) {
319 throw new IllegalArgumentException("RandomSubset.remove(): i = " + i + " illegal");
320 }
321 int j = helper.indexOf(i);
322 if (j >= M) {
323 helper.swapElements(M, j);
324 ++M;
325 }
326 return this;
327 }
328
329 /**
330 * Restart this random subset's iteration. The original set is reset to
331 * contain all the integers from 0 through <I>N</I>−1 inclusive. If
332 * integers had been removed from the original set by calling the
333 * <code>remove(i)</code> method, those integers must be removed again by
334 * calling the <code>remove(i)</code> method again. The iteration is restarted,
335 * and calling the <code>next()</code> method will yield a different random
336 * subset.
337 * <P>
338 * The <code>restart()</code> method lets you generate multiple random subsets
339 * from the same original set using the same RandomSubset object. This
340 * avoids the multiple storage allocations that would take place when
341 * creating multiple RandomSubset objects. This in turn can reduce the
342 * running time, especially when <I>N</I> is large.
343 */
344 public void restart() {
345 M = 0;
346 helper.restart();
347 }
348
349 // Unit test main program.
350 // /**
351 // * Unit test main program.
352 // * <P>
353 // * Usage: java edu.rit.util.RandomSubset <I>impl</I> <I>seed</I> <I>N</I>
354 // * <I>M</I> [ <I>i</I> ... ]
355 // * <BR><I>impl</I> = "sparse" or "dense"
356 // * <BR><I>seed</I> = Random seed
357 // * <BR><I>N</I> = Size of original set
358 // * <BR><I>M</I> = Size of random subset
359 // * <BR><I>i</I> = Integer(s) to remove from original set
360 // */
361 // public static void main
362 // (String[] args)
363 // {
364 // if (args.length < 3) usage();
365 // boolean dense = args[0].equals ("dense");
366 // long seed = Long.parseLong (args[1]);
367 // int N = Integer.parseInt (args[2]);
368 // int M = Integer.parseInt (args[3]);
369 // RandomSubset rs =
370 // new RandomSubset (Random.getInstance (seed), N, dense);
371 // for (int r = 0; r < 4; ++ r)
372 // {
373 // rs.restart();
374 // for (int j = 3; j < args.length; ++ j)
375 // {
376 // rs.remove (Integer.parseInt (args[j]));
377 // }
378 // for (int j = 0; j < M; ++ j)
379 // {
380 // System.out.printf ("%d ", rs.next());
381 // }
382 // System.out.println();
383 // }
384 // }
385 //
386 // private static void usage()
387 // {
388 // System.err.println ("Usage: java edu.rit.util.RandomSubset <impl> <seed> <N> <M> [<i> ...]");
389 // System.err.println ("<impl> = \"sparse\" or \"dense\"");
390 // System.err.println ("<seed> = Random seed");
391 // System.err.println ("<N> = Size of original set");
392 // System.err.println ("<M> = Size of random subset");
393 // System.err.println ("<i> = Integer(s) to remove from original set");
394 // System.exit (1);
395 // }
396 }