View Javadoc
1   //******************************************************************************
2   //
3   // File:    RandomSubset.java
4   // Package: edu.rit.util
5   // Unit:    Class edu.rit.util.RandomSubset
6   //
7   // This Java source file is copyright (C) 2011 by Alan Kaminsky. All rights
8   // reserved. For further information, contact the author, Alan Kaminsky, at
9   // ark@cs.rit.edu.
10  //
11  // This Java source file is part of the Parallel Java Library ("PJ"). PJ is free
12  // software; you can redistribute it and/or modify it under the terms of the GNU
13  // General Public License as published by the Free Software Foundation; either
14  // version 3 of the License, or (at your option) any later version.
15  //
16  // PJ is distributed in the hope that it will be useful, but WITHOUT ANY
17  // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
18  // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
19  //
20  // Linking this library statically or dynamically with other modules is making a
21  // combined work based on this library. Thus, the terms and conditions of the GNU
22  // General Public License cover the whole combination.
23  //
24  // As a special exception, the copyright holders of this library give you
25  // permission to link this library with independent modules to produce an
26  // executable, regardless of the license terms of these independent modules, and
27  // to copy and distribute the resulting executable under terms of your choice,
28  // provided that you also meet, for each linked independent module, the terms
29  // and conditions of the license of that module. An independent module is a module
30  // which is not derived from or based on this library. If you modify this library,
31  // you may extend this exception to your version of the library, but you are not
32  // obligated to do so. If you do not wish to do so, delete this exception
33  // statement from your version.
34  //
35  // A copy of the GNU General Public License is provided in the file gpl.txt. You
36  // may also obtain a copy of the GNU General Public License on the World Wide
37  // Web at http://www.gnu.org/licenses/gpl.html.
38  //
39  //******************************************************************************
40  package edu.rit.util;
41  
42  import java.util.HashMap;
43  import java.util.Iterator;
44  import java.util.Map;
45  import java.util.NoSuchElementException;
46  
47  /**
48   * Class RandomSubset provides an object that generates a random subset of a set
49   * of integers. The original set consists of the integers from 0 to
50   * <I>N</I>&minus;1 inclusive. The subset consists of integers chosen at random
51   * without replacement from the original set; each member of the original set is
52   * equally likely to be chosen. Class RandomSubset is an Iterator that visits
53   * the elements of the original set in a random order; each <code>next()</code>
54   * method call returns another element of the subset.
55   * <P>
56   * Calling the <code>remove(i)</code> method one or more times removes the given
57   * integers from the original set. Those integers will not be chosen for the
58   * random subset.
59   * <P>
60   * Class RandomSubset is layered on top of a pseudorandom number generator
61   * (PRNG), an instance of class {@linkplain edu.rit.util.Random Random}. Each
62   * time the <code>next()</code> method is called, one random number is consumed from
63   * the underlying PRNG.
64   * <P>
65   * Class RandomSubset has two different implementations with different storage
66   * requirements. The implementation is specified with a constructor argument.
67   * <UL>
68   *
69   * <LI>
70   * The <I>sparse</I> implementation requires less storage when the size of the
71   * random subset (the number of <code>next()</code> method calls) is a small
72   * fraction of the size of the original set (<I>N</I>).
73   *
74   * <LI>
75   * The <I>dense</I> implementation requires less storage when the size of the
76   * random subset is a large fraction of the size of the original set.
77   * </UL>
78   *
79   * @author Alan Kaminsky
80   * @version 22-Nov-2011
81   */
82  public class RandomSubset
83          implements Iterator<Integer> {
84  
85  // Hidden data members.
86      // The underlying PRNG.
87      private Random prng;
88  
89      // The size of the original set.
90      private int N;
91  
92      // The number of random subset elements returned so far.
93      private int M;
94  
95      // Helper class.
96      private Helper helper;
97  
98  // Hidden helper classes.
99      // Helper abstract base class.
100     private abstract static class Helper {
101 
102         // Returns the element in the permutation array at index i.
103         public abstract int getElement(int i);
104 
105         // Sets the element in the permutation array at index i to the given
106         // value.
107         public abstract void setElement(int i, int value);
108 
109         // Swaps the elements in the permutation array at indexes i and j.
110         public abstract void swapElements(int i, int j);
111 
112         // Returns the index in the permutation array at which the given value
113         // resides.
114         public abstract int indexOf(int value);
115 
116         // Restarts the iteration.
117         public abstract void restart();
118     }
119 
120     // Sparse implementation helper class.
121     private static class SparseHelper
122             extends Helper {
123         // A sparse array containing a permutation of the integers from 0 to
124         // N-1. Implemented as a mapping from array index to array element. If
125         // an array index is not in the map, the corresponding array element is
126         // the same as the array index.
127 
128         private final HashMap<Integer, Integer> permutation = new HashMap<>();
129 
130         // Returns the element in the permutation array at index i.
131         public int getElement(int i) {
132             Integer element = permutation.get(i);
133             return element == null ? i : element;
134         }
135 
136         // Sets the element in the permutation array at index i to the given
137         // value.
138         public void setElement(int i, int value) {
139             if (value == i) {
140                 permutation.remove(i);
141             } else {
142                 permutation.put(i, value);
143             }
144         }
145 
146         // Swaps the elements in the permutation array at indexes i and j.
147         public void swapElements(int i, int j) {
148             int tmp = getElement(i);
149             setElement(i, getElement(j));
150             setElement(j, tmp);
151         }
152 
153         // Returns the index in the permutation array at which the given value
154         // resides.
155         public int indexOf(int value) {
156             for (Map.Entry<Integer, Integer> entry : permutation.entrySet()) {
157                 if (entry.getValue() == value) {
158                     return entry.getKey();
159                 }
160             }
161             return value;
162         }
163 
164         // Restarts the iteration.
165         public void restart() {
166             permutation.clear();
167         }
168     }
169 
170     // Dense implementation helper class.
171     private class DenseHelper
172             extends Helper {
173         // A dense array containing a permutation of the integers from 0 to
174         // N-1.
175 
176         private int[] permutation = new int[N];
177 
178         // Construct a new dense helper.
179         public DenseHelper() {
180             restart();
181         }
182 
183         // Returns the element in the permutation array at index i.
184         public int getElement(int i) {
185             return permutation[i];
186         }
187 
188         // Sets the element in the permutation array at index i to the given
189         // value.
190         public void setElement(int i,
191                 int value) {
192             permutation[i] = value;
193         }
194 
195         // Swaps the elements in the permutation array at indexes i and j.
196         public void swapElements(int i,
197                 int j) {
198             int tmp = permutation[i];
199             permutation[i] = permutation[j];
200             permutation[j] = tmp;
201         }
202 
203         // Returns the index in the permutation array at which the given value
204         // resides.
205         public int indexOf(int value) {
206             int i = 0;
207             while (permutation[i] != value) {
208                 ++i;
209             }
210             return i;
211         }
212 
213         // Restarts the iteration.
214         public void restart() {
215             for (int i = 0; i < N; ++i) {
216                 permutation[i] = i;
217             }
218         }
219     }
220 
221 // Exported constructors.
222     /**
223      * Construct a new random subset object for the original set consisting of
224      * the integers from 0 through <I>N</I>&minus;1 inclusive. The sparse
225      * implementation is used.
226      *
227      * @param prng Underlying PRNG.
228      * @param N Size of original set.
229      * @exception NullPointerException (unchecked exception) Thrown if
230      * <code>prng</code> is null.
231      * @exception IllegalArgumentException (unchecked exception) Thrown if
232      * <I>N</I> &lt; 0.
233      */
234     public RandomSubset(Random prng,
235             int N) {
236         this(prng, N, false);
237     }
238 
239     /**
240      * Construct a new random subset object for the original set consisting of
241      * the integers from 0 through <I>N</I>&minus;1 inclusive. If <code>dense</code>
242      * is false, the sparse implementation is used. If <code>dense</code> is true,
243      * the dense implementation is used.
244      *
245      * @param prng Underlying PRNG.
246      * @param N Size of original set.
247      * @param dense False to use the sparse implementation, true to use the
248      * dense implementation.
249      * @exception NullPointerException (unchecked exception) Thrown if
250      * <code>prng</code> is null.
251      * @exception IllegalArgumentException (unchecked exception) Thrown if
252      * <I>N</I> &lt; 0.
253      */
254     public RandomSubset(Random prng,
255             int N,
256             boolean dense) {
257         if (prng == null) {
258             throw new NullPointerException("RandomSubset(): prng is null");
259         }
260         if (N < 0) {
261             throw new IllegalArgumentException("RandomSubset(): N = " + N + " illegal");
262         }
263         this.prng = prng;
264         this.N = N;
265         this.helper = dense ? new DenseHelper() : new SparseHelper();
266     }
267 
268 // Exported operations.
269     /**
270      * Determine whether there are more integers in the random subset.
271      *
272      * @return True if there are more integers in the random subset, false if
273      * all the integers in the original set have been used up.
274      */
275     public boolean hasNext() {
276         return M < N;
277     }
278 
279     /**
280      * Returns the next integer in the random subset.
281      *
282      * @return Next integer in the random subset.
283      * @exception NoSuchElementException (unchecked exception) Thrown if all the
284      * integers in the original set have been used up.
285      */
286     public Integer next() {
287         if (M >= N) {
288             throw new NoSuchElementException("RandomSubset.next(): No further elements");
289         }
290         helper.swapElements(M, M + prng.nextInt(N - M));
291         ++M;
292         return helper.getElement(M - 1);
293     }
294 
295     /**
296      * Unsupported operation.
297      *
298      * @exception UnsupportedOperationException (unchecked exception) Thrown
299      * always.
300      */
301     public void remove() {
302         throw new UnsupportedOperationException();
303     }
304 
305     /**
306      * Remove the given integer from the original set.
307      * <P>
308      * If <code>i</code> has already been removed from the original set, either by a
309      * <code>remove(i)</code> method call, or by a <code>next()</code> method call that
310      * returned <code>i</code>, then this method does nothing.
311      *
312      * @param i Integer to remove.
313      * @return This random subset object.
314      * @exception IllegalArgumentException (unchecked exception) Thrown if
315      * <code>i</code> is not in the range 0 through <I>N</I>&minus;1 inclusive.
316      */
317     public RandomSubset remove(int i) {
318         if (0 > i || i >= N) {
319             throw new IllegalArgumentException("RandomSubset.remove(): i = " + i + " illegal");
320         }
321         int j = helper.indexOf(i);
322         if (j >= M) {
323             helper.swapElements(M, j);
324             ++M;
325         }
326         return this;
327     }
328 
329     /**
330      * Restart this random subset's iteration. The original set is reset to
331      * contain all the integers from 0 through <I>N</I>&minus;1 inclusive. If
332      * integers had been removed from the original set by calling the
333      * <code>remove(i)</code> method, those integers must be removed again by
334      * calling the <code>remove(i)</code> method again. The iteration is restarted,
335      * and calling the <code>next()</code> method will yield a different random
336      * subset.
337      * <P>
338      * The <code>restart()</code> method lets you generate multiple random subsets
339      * from the same original set using the same RandomSubset object. This
340      * avoids the multiple storage allocations that would take place when
341      * creating multiple RandomSubset objects. This in turn can reduce the
342      * running time, especially when <I>N</I> is large.
343      */
344     public void restart() {
345         M = 0;
346         helper.restart();
347     }
348 
349 // Unit test main program.
350 //	/**
351 //	 * Unit test main program.
352 //	 * <P>
353 //	 * Usage: java edu.rit.util.RandomSubset <I>impl</I> <I>seed</I> <I>N</I>
354 //	 * <I>M</I> [ <I>i</I> ... ]
355 //	 * <BR><I>impl</I> = "sparse" or "dense"
356 //	 * <BR><I>seed</I> = Random seed
357 //	 * <BR><I>N</I> = Size of original set
358 //	 * <BR><I>M</I> = Size of random subset
359 //	 * <BR><I>i</I> = Integer(s) to remove from original set
360 //	 */
361 //	public static void main
362 //		(String[] args)
363 //		{
364 //		if (args.length < 3) usage();
365 //		boolean dense = args[0].equals ("dense");
366 //		long seed = Long.parseLong (args[1]);
367 //		int N = Integer.parseInt (args[2]);
368 //		int M = Integer.parseInt (args[3]);
369 //		RandomSubset rs =
370 //			new RandomSubset (Random.getInstance (seed), N, dense);
371 //		for (int r = 0; r < 4; ++ r)
372 //			{
373 //			rs.restart();
374 //			for (int j = 3; j < args.length; ++ j)
375 //				{
376 //				rs.remove (Integer.parseInt (args[j]));
377 //				}
378 //			for (int j = 0; j < M; ++ j)
379 //				{
380 //				System.out.printf ("%d  ", rs.next());
381 //				}
382 //			System.out.println();
383 //			}
384 //		}
385 //
386 //	private static void usage()
387 //		{
388 //		System.err.println ("Usage: java edu.rit.util.RandomSubset <impl> <seed> <N> <M> [<i> ...]");
389 //		System.err.println ("<impl> = \"sparse\" or \"dense\"");
390 //		System.err.println ("<seed> = Random seed");
391 //		System.err.println ("<N> = Size of original set");
392 //		System.err.println ("<M> = Size of random subset");
393 //		System.err.println ("<i> = Integer(s) to remove from original set");
394 //		System.exit (1);
395 //		}
396 }