Skip to content

Commit

Permalink
scaladoc #23 #25
Browse files Browse the repository at this point in the history
  • Loading branch information
vsuthichai committed Aug 18, 2016
1 parent 21b8ed1 commit ac7a096
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 54 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,15 @@ import scala.collection.mutable
import scala.util.Random

/**
* Sample with or without replacement a combination of K items from an iterable
* of length N. The combination returned will never be an empty set. Samples
* returned are in lexicographical order.
*
* @param iterable
* @param k
* @param replacement
* @tparam T
* @param iterable an iterable of type T
* @param k number of items to choose from the iterable
* @param replacement boolean indicating sample with or without replacement
* @param ord Ordering of type T
* @tparam T element type of iterable
*/
abstract class AbstractCombinations[T](
iterable: Iterable[T],
Expand All @@ -17,12 +21,12 @@ abstract class AbstractCombinations[T](

protected val values = iterable.toSeq

def sample(rng: Random): Iterable[T] = {
protected def sample(rng: Random): Iterable[T] = {
if (replacement) sampleWithReplacement(rng)
else sampleNoReplacement(rng)
}

def sampleWithReplacement(rng: Random) = {
protected def sampleWithReplacement(rng: Random) = {
val combo = new mutable.PriorityQueue[T]()

while (combo.size < k) {
Expand All @@ -33,7 +37,7 @@ abstract class AbstractCombinations[T](
combo.toIndexedSeq
}

def sampleNoReplacement(rng: Random) = {
protected def sampleNoReplacement(rng: Random) = {
val combo = mutable.SortedSet[T]()
val indices = mutable.Set[Int]()

Expand All @@ -50,12 +54,13 @@ abstract class AbstractCombinations[T](
}

/**
* Sample a single combination of K unordered items from the iterable of length N.
* Sample a single combination of K items from the iterable of length N.
*
* @param iterable
* @param k
* @param replacement
* @tparam T
* @param iterable an iterable of type T
* @param k the number of items to sample from the iterable of length N
* @param replacement boolean indicating whether to sample with replacement
* @param ord Ordering of type T
* @tparam T element type of iterable
*/
case class Combination[T](
iterable: Iterable[T],
Expand All @@ -71,14 +76,16 @@ case class Combination[T](


/**
* Binomial coefficient implementation. Pick K unordered items from an Iterable of N items.
* Binomial coefficient implementation. Choose K items from an Iterable of N items.
* Also known as N Choose K, where N is the size of an Iterable and K is the desired number
* of items to be chosen. This implementation will actually compute all the possible choices
* and return them as an Iterable.
* of items to be chosen. Items will always be returned in lexicographical order.
*
* @param iterable an iterable of finite length
* @param k the number of items to choose
* @tparam T
* @param k the number of items to sample from the iterable of length N to form a combination
* @param x number of combinations to sample
* @param replacement boolean indicating whether to sample with replacement
* @param ord Ordering of type T
* @tparam T element type of iterable
*/
case class Combinations[T](
iterable: Iterable[T],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,30 +4,37 @@ import scala.collection.mutable
import scala.util.Random

/**
* Sample a subset of size up to K from an iterable of length N, with or without replacement
*
* @param iterable
* @param k
* @param replacement
* @param ord
* @tparam T
* @param iterable an iterable of length N
* @param K number of items sampled, up to K, chosen randomly between 1 to K
* @param replacement boolean indicating sample with or without replacement
* @param ord ordering of type T
* @tparam T element type of iterable
*/
abstract class AbstractSubset[T](iterable: Iterable[T], k: Int, replacement: Boolean = false)(implicit ord: Ordering[T]) extends Serializable {
abstract class AbstractSubset[T](
iterable: Iterable[T],
K: Int,
replacement: Boolean = false)
(implicit ord: Ordering[T])
extends Serializable {

protected val values = iterable.toIndexedSeq

def sampleWithReplacement(rng: Random): Iterable[T] = {
val sampleSize = rng.nextInt(k) + 1
protected def sampleWithReplacement(rng: Random): Iterable[T] = {
val sampleSize = rng.nextInt(K) + 1
val subset = new mutable.PriorityQueue[T]()

while (subset.size < k) {
while (subset.size < K) {
val index = rng.nextInt(values.length)
subset += values(index)
}

subset.toIndexedSeq
}

def sampleNoReplacement(rng: Random): Iterable[T] = {
val sampleSize = rng.nextInt(k) + 1
protected def sampleNoReplacement(rng: Random): Iterable[T] = {
val sampleSize = rng.nextInt(K) + 1
val subset = mutable.SortedSet[T]()
val indices = mutable.Set[Int]()

Expand All @@ -43,61 +50,58 @@ abstract class AbstractSubset[T](iterable: Iterable[T], k: Int, replacement: Boo
subset.toIndexedSeq
}

def sample(rng: Random): Iterable[T] = {
protected def sample(rng: Random): Iterable[T] = {
if (replacement) sampleWithReplacement(rng)
else sampleNoReplacement(rng)
}
}

/**
* Sample a single subset.
*
* @param iterable
* @param k
* @param replacement
* @param ord
* @tparam T
* @param iterable an iterable of length N
* @param K number of items sampled, up to K, chosen randomly between 1 to K
* @param replacement boolean indicating sample with or without replacement
* @param ord ordering of type T
* @tparam T element type of iterable
*/
case class Subset[T](
iterable: Iterable[T],
k: Int,
K: Int,
replacement: Boolean = false)
(implicit ord: Ordering[T])
extends AbstractSubset[T](iterable, k, replacement)
extends AbstractSubset[T](iterable, K, replacement)
with IterableRandomSampler[T] {

assert(k > 0 && k <= values.size, "K must be in the interval (0, N]")
assert(K > 0 && K <= values.size, "K must be in the interval (0, N]")

def apply(rng: Random): Iterable[T] = sample(rng)
}

/**
* Sample many subsets, up to X subsets.
*
* @param iterable
* @param k
* @param x
* @param replacement
* @param ord
* @tparam T
* @param iterable an iterable of length N
* @param K number of items sampled, up to K, chosen randomly between 1 to K
* @param X number of subsets to sample
* @param replacement boolean indicating sample with or without replacement
* @param ord ordering of type T
* @tparam T element type of iterable
*/
case class Subsets[T](
iterable: Iterable[T],
k: Int,
x: Int,
K: Int,
X: Int,
replacement: Boolean = false)
(implicit ord: Ordering[T])
extends AbstractSubset[T](iterable, k, replacement)
extends AbstractSubset[T](iterable, K, replacement)
with CombinatoricRandomSampler[T] {

assert(k > 0 && k <= values.size, "K must be in the interval (0, N]")
assert(x > 0, "X must be greater than 0")
assert(K > 0 && K <= values.size, "K must be in the interval (0, N]")
assert(X > 0, "X must be greater than 0")

/**
*
* @param rng
* @return
*/
def apply(rng: Random): Iterable[Iterable[T]] = {
val numSubsets = rng.nextInt(x) + 1
val numSubsets = rng.nextInt(X) + 1

if (replacement) {
Seq.fill(numSubsets)(sample(rng))
Expand Down

0 comments on commit ac7a096

Please sign in to comment.